Skip to main content

mockforge_bench/conformance/
self_test.rs

1//! Positive + per-category negative request driver against a live server.
2//!
3//! Issue #79 round 13 (4) — Srikanth's (e) ask: a way to test both
4//! positive and negative compliance scenarios separately, where the
5//! positive cases should pass and the negative cases should be
6//! rejected.
7//!
8//! This module sits *alongside* the existing conformance executor
9//! (which drives k6 / native checks on a single positive call per
10//! operation). The self-test driver synthesises per-category
11//! deliberately-bad requests and asserts that the server actually
12//! rejects them with a 4xx — useful when verifying that
13//! `validate_request_with_all` is wired correctly for the user's spec
14//! (the exact gap that round-13 (3) fixed).
15//!
16//! Scope of the initial MVP: covers the highest-signal negatives —
17//! empty body when one is required, missing required query/header
18//! params, and wrong-type path params. Doesn't try to mutate every
19//! field of a JSON-Schema-validated body; that's a follow-up.
20
21use super::spec_driven::{AnnotatedOperation, ApiKeyLocation, SecuritySchemeInfo};
22use reqwest::{Client, Method};
23use std::collections::BTreeMap;
24use std::net::IpAddr;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::sync::{Arc, Mutex};
27use std::time::Duration;
28
29/// Round 23 (c-iii) — per-direction body cap when capturing
30/// request/response payloads to `conformance-self-test-requests.jsonl`.
31/// 16 KiB keeps a 1000-case run under ~32 MB even if every payload
32/// fills the cap, while still preserving enough of a typical JSON body
33/// (or a stack-trace error response) to debug from.
34const CAPTURE_BODY_CAP_BYTES: usize = 16 * 1024;
35
36/// Round 17.2 — cap on schema-driven negatives per operation. A spec
37/// with 100 properties per body could produce hundreds of mutations
38/// for a single operation; combined with thousands of operations
39/// that's a runaway test matrix. 12 covers the highest-signal
40/// mutations (type mismatch + required-removed + a few constraint
41/// breaks) without exploding wall time on large specs.
42const SCHEMA_MUTATION_CAP: usize = 12;
43
44/// Round 25 (k) — content-type swap probes. For operations declaring a
45/// JSON request body, each entry below produces one probe that lies
46/// about Content-Type while keeping the JSON payload. A spec-compliant
47/// server should respond 415 (or 400). Order matches the order
48/// Srikanth listed in his round-23 reply: XML, YAML, multipart, and
49/// the URL-encoded variant he added in round 24.
50const CONTENT_TYPE_SWAP_VARIANTS: &[(&str, &str)] = &[
51    ("application/xml", "request-body:content-type-mismatch:xml"),
52    ("application/yaml", "request-body:content-type-mismatch:yaml"),
53    ("multipart/form-data", "request-body:content-type-mismatch:multipart"),
54    (
55        "application/x-www-form-urlencoded",
56        "request-body:content-type-mismatch:urlencoded",
57    ),
58];
59
60/// Round 27 (k variant b) — embedded content payloads. Content-Type
61/// stays `application/json` and the envelope IS valid JSON; we just
62/// stuff a non-JSON snippet into a string field's value. The test
63/// surfaces servers that try to parse string field contents (e.g.
64/// XML-EE expanders, YAML loaders, urlencoded parsers) and crash on
65/// the payload — a 5xx here is the finding. Label, payload pairs:
66const EMBEDDED_CONTENT_VARIANTS: &[(&str, &str)] = &[
67    ("request-body:embedded-content:xml", "<root><cmd>execute()</cmd></root>"),
68    ("request-body:embedded-content:yaml", "key: value\n- item1\n- item2"),
69    (
70        "request-body:embedded-content:multipart",
71        "--boundary\r\nContent-Disposition: form-data; name=\"x\"\r\n\r\nval\r\n--boundary--",
72    ),
73    ("request-body:embedded-content:urlencoded", "a=1&b=2&c=hello%20world"),
74];
75
76/// Configuration for a self-test run.
77#[derive(Debug, Clone)]
78pub struct SelfTestConfig {
79    pub target_url: String,
80    pub skip_tls_verify: bool,
81    pub timeout: Duration,
82    /// Optional extra headers to attach to every request (e.g. auth).
83    pub extra_headers: Vec<(String, String)>,
84    /// Delay between requests to avoid hammering the server.
85    pub delay_between_requests: Duration,
86    /// Round 18.1 — base path to prepend to every spec path. When the
87    /// spec declares `/users` and the deployed API is served under
88    /// `/api`, `--base-path /api` should make the self-test hit
89    /// `https://target/api/users` instead of `https://target/users`.
90    /// Pre-fix this was ignored entirely and every operation 404'd
91    /// (Srikanth's vCenter run on 0.3.152: 1275 positives, 1275 4xx).
92    pub base_path: Option<String>,
93    /// Round 18.5 — local source IPs to bind outgoing requests to.
94    /// Each IP must already be assigned to an interface on the host.
95    /// Operations round-robin through the resulting client pool.
96    pub source_ips: Vec<IpAddr>,
97    /// Round 18.5 — fake source IPs to advertise via forwarded-IP
98    /// headers (used to exercise GEODB lookup at the destination).
99    /// Rotated per operation.
100    pub geo_source_ips: Vec<IpAddr>,
101    /// Which forwarded-IP header(s) to populate when `geo_source_ips`
102    /// is non-empty. Empty → no-op; default below sets the standard
103    /// three-header set.
104    pub geo_source_headers: Vec<String>,
105    /// Round 23 (c-iii) — when `Some`, every probe captures method, URL,
106    /// request headers/body and response status/headers/body into this
107    /// sink. Caller drains it after `run_self_test` and writes
108    /// `conformance-self-test-requests.jsonl`. None → no capture (zero
109    /// extra allocations on the hot path).
110    pub capture: Option<Arc<Mutex<Vec<CaseCapture>>>>,
111    /// Round 25 — when true, validate every probe's response body
112    /// against the spec's response schema for the actual status
113    /// returned (closes round 21.3 / Srikanth's a2 / a3 ask). The
114    /// validation result lands in `CaseCapture::response_schema_error`
115    /// (None → matched, or no schema for that status). Default false:
116    /// JSON-Schema validation of large response bodies adds wall-clock
117    /// time and the user has to opt in.
118    pub validate_response_schemas: bool,
119}
120
121/// Round 23 (c-iii) — one captured request/response pair, one per
122/// probe (positive or negative). Serialised as a JSON line in
123/// `conformance-self-test-requests.jsonl`. Headers are kept as
124/// `BTreeMap` for stable ordering. Bodies are truncated to
125/// `CAPTURE_BODY_CAP_BYTES`; `*_truncated` flags whether more was
126/// dropped.
127#[derive(Debug, Clone, serde::Serialize)]
128pub struct CaseCapture {
129    pub label: String,
130    pub method: String,
131    pub url: String,
132    pub request_headers: BTreeMap<String, String>,
133    pub request_body: Option<String>,
134    pub request_body_truncated: bool,
135    pub response_status: u16,
136    pub response_headers: BTreeMap<String, String>,
137    pub response_body: Option<String>,
138    pub response_body_truncated: bool,
139    pub error: Option<String>,
140    /// Round 25 — when `validate_response_schemas` is on and the spec
141    /// declares a schema for `response_status`, this carries the
142    /// validation message (or None when the body matched, or no schema
143    /// was declared for that status). Serialised verbatim in the JSONL
144    /// and rendered in the HTML viewer.
145    #[serde(default, skip_serializing_if = "Option::is_none")]
146    pub response_schema_error: Option<String>,
147}
148
149impl Default for SelfTestConfig {
150    fn default() -> Self {
151        Self {
152            target_url: "http://localhost:3000".into(),
153            skip_tls_verify: false,
154            timeout: Duration::from_secs(15),
155            extra_headers: Vec::new(),
156            delay_between_requests: Duration::from_millis(0),
157            base_path: None,
158            source_ips: Vec::new(),
159            geo_source_ips: Vec::new(),
160            geo_source_headers: default_geo_source_headers(),
161            capture: None,
162            validate_response_schemas: false,
163        }
164    }
165}
166
167/// Truncate `body` to `CAPTURE_BODY_CAP_BYTES` on a UTF-8 boundary,
168/// returning the trimmed string and whether truncation occurred. Used
169/// for both request and response bodies in the capture sink.
170fn truncate_body_for_capture(body: &str) -> (String, bool) {
171    if body.len() <= CAPTURE_BODY_CAP_BYTES {
172        return (body.to_string(), false);
173    }
174    let mut end = CAPTURE_BODY_CAP_BYTES;
175    while end > 0 && !body.is_char_boundary(end) {
176        end -= 1;
177    }
178    (body[..end].to_string(), true)
179}
180
181/// Default forwarded-IP header set. Covers the three conventions a
182/// real GEODB front-end is likely to read in this order of
183/// preference: Cloudflare (`CF-Connecting-IP`), Akamai/CloudFront
184/// (`True-Client-IP`), then the de-facto standard
185/// `X-Forwarded-For`. Override via `--geo-source-header` to test a
186/// specific stack.
187pub fn default_geo_source_headers() -> Vec<String> {
188    vec![
189        "X-Forwarded-For".to_string(),
190        "True-Client-IP".to_string(),
191        "CF-Connecting-IP".to_string(),
192    ]
193}
194
195/// Outcome of a single test case (positive or negative).
196#[derive(Debug, Clone, serde::Serialize)]
197pub struct CaseOutcome {
198    pub label: String,
199    pub expected_4xx: bool,
200    pub actual_status: u16,
201    /// True when the response status matches expectation
202    /// (positive → 2xx-3xx, negative → 4xx).
203    pub passed: bool,
204}
205
206/// All cases run against one annotated operation.
207#[derive(Debug, Clone, serde::Serialize)]
208pub struct OperationResult {
209    pub method: String,
210    pub path: String,
211    pub positive: Option<CaseOutcome>,
212    pub negatives: Vec<CaseOutcome>,
213}
214
215/// Summary report rolled up across all operations.
216#[derive(Debug, Default, Clone, serde::Serialize)]
217pub struct SelfTestReport {
218    pub positive_pass: usize,
219    pub positive_fail: usize,
220    /// Per category: count of negative cases the server correctly
221    /// rejected with a 4xx (we caught the spec violation).
222    pub negative_caught: BTreeMap<String, usize>,
223    /// Per category: count of negative cases that should have been
224    /// rejected but came back with a non-4xx (validator gap).
225    pub negative_missed: BTreeMap<String, usize>,
226    pub operations: Vec<OperationResult>,
227}
228
229impl SelfTestReport {
230    /// All-pass means every positive case got 2xx-3xx and every
231    /// negative case got 4xx.
232    pub fn all_passed(&self) -> bool {
233        self.positive_fail == 0 && self.negative_missed.values().sum::<usize>() == 0
234    }
235
236    /// Round 18.1 — detect the "self-test target is misconfigured"
237    /// case where every positive failed with the *same* status code.
238    /// The classic example: `--base-path /api` was forgotten so every
239    /// request hits a path the server doesn't know and returns 404.
240    /// Pre-warning, the user saw all-green negative buckets (because
241    /// "missing route" 404s look like "validator rejected") and no
242    /// indication that the run was meaningless. Returns Some(status)
243    /// when ≥10 positives all failed with the same status, else None.
244    pub fn detect_target_misconfiguration(&self) -> Option<u16> {
245        if self.positive_pass > 0 || self.positive_fail < 10 {
246            return None;
247        }
248        let mut seen: Option<u16> = None;
249        for op in &self.operations {
250            let Some(p) = &op.positive else {
251                continue;
252            };
253            if p.passed {
254                return None;
255            }
256            match seen {
257                None => seen = Some(p.actual_status),
258                Some(s) if s != p.actual_status => return None,
259                _ => {}
260            }
261        }
262        seen
263    }
264
265    /// Human-readable summary string. One line for positives, one per
266    /// category for negatives. Designed to slot into existing
267    /// `TerminalReporter` output.
268    pub fn render_summary(&self) -> String {
269        let mut out = String::new();
270        out.push_str(&format!(
271            "Positives: {} pass / {} fail\n",
272            self.positive_pass, self.positive_fail
273        ));
274        let mut keys: Vec<&String> =
275            self.negative_caught.keys().chain(self.negative_missed.keys()).collect();
276        keys.sort();
277        keys.dedup();
278        for cat in keys {
279            let caught = self.negative_caught.get(cat).copied().unwrap_or(0);
280            let missed = self.negative_missed.get(cat).copied().unwrap_or(0);
281            let mark = if missed == 0 { "✓" } else { "⚠" };
282            out.push_str(&format!(
283                "Negatives [{}]: {} caught / {} missed  {}\n",
284                cat, caught, missed, mark
285            ));
286        }
287        out
288    }
289}
290
291/// Execute the self-test plan against `config.target_url` for every
292/// `AnnotatedOperation`. Returns the aggregated report; callers
293/// decide how to display it (e.g. via `render_summary` or by writing
294/// the JSON serialisation to disk).
295pub async fn run_self_test(
296    operations: &[AnnotatedOperation],
297    config: &SelfTestConfig,
298) -> Result<SelfTestReport, reqwest::Error> {
299    // Round 18.5 — build a client pool when `source_ips` is set,
300    // one reqwest::Client per IP, each bound to its local address.
301    // Operations round-robin through the pool. Empty pool → single
302    // default client (the pre-18.5 behaviour).
303    let clients = build_client_pool(config)?;
304    let client_cursor = AtomicUsize::new(0);
305    let geo_cursor = AtomicUsize::new(0);
306
307    let mut report = SelfTestReport::default();
308    for op in operations {
309        let client_idx = client_cursor.fetch_add(1, Ordering::Relaxed) % clients.len();
310        let client = &clients[client_idx];
311        let geo_ip = if config.geo_source_ips.is_empty() {
312            None
313        } else {
314            let idx = geo_cursor.fetch_add(1, Ordering::Relaxed) % config.geo_source_ips.len();
315            Some(config.geo_source_ips[idx])
316        };
317        let result = test_operation(client, config, op, geo_ip).await;
318        if let Some(p) = &result.positive {
319            if p.passed {
320                report.positive_pass += 1;
321            } else {
322                report.positive_fail += 1;
323            }
324        }
325        for neg in &result.negatives {
326            let cat = neg.label.split(':').next().unwrap_or("other").to_string();
327            if neg.passed {
328                *report.negative_caught.entry(cat).or_insert(0) += 1;
329            } else {
330                *report.negative_missed.entry(cat).or_insert(0) += 1;
331            }
332        }
333        report.operations.push(result);
334        if !config.delay_between_requests.is_zero() {
335            tokio::time::sleep(config.delay_between_requests).await;
336        }
337    }
338    Ok(report)
339}
340
341/// Round 18.5 — append GEODB forwarded-IP headers to the
342/// operation's declared headers. Returns the original vec untouched
343/// when `geo_ip` is None or `geo_headers` is empty.
344///
345/// If the operation already declares one of the geo headers (rare
346/// but legal), we keep the operation's value — the caller's spec
347/// wins.
348fn effective_op_headers(
349    base: &[(String, String)],
350    geo_ip: Option<IpAddr>,
351    geo_headers: &[String],
352) -> Vec<(String, String)> {
353    let mut out = base.to_vec();
354    let Some(ip) = geo_ip else {
355        return out;
356    };
357    let value = ip.to_string();
358    for h in geo_headers {
359        // Case-insensitive duplicate check: don't override the
360        // spec's own declared value for the header.
361        if out.iter().any(|(k, _)| k.eq_ignore_ascii_case(h)) {
362            continue;
363        }
364        out.push((h.clone(), value.clone()));
365    }
366    out
367}
368
369/// Round 18.5 — build a pool of reqwest clients, one per declared
370/// source IP. Empty `source_ips` → a single default client.
371///
372/// The OS must already have each `source_ip` assigned to an
373/// interface; reqwest's `.local_address()` issues a `bind()` syscall
374/// at connect time, so an IP the kernel doesn't recognise surfaces
375/// as `EADDRNOTAVAIL` at request time, not at builder time.
376fn build_client_pool(config: &SelfTestConfig) -> Result<Vec<Client>, reqwest::Error> {
377    let make = |bind: Option<IpAddr>| -> Result<Client, reqwest::Error> {
378        let mut builder = Client::builder().timeout(config.timeout);
379        if config.skip_tls_verify {
380            builder = builder.danger_accept_invalid_certs(true);
381        }
382        if let Some(addr) = bind {
383            builder = builder.local_address(addr);
384        }
385        builder.build()
386    };
387    if config.source_ips.is_empty() {
388        Ok(vec![make(None)?])
389    } else {
390        config.source_ips.iter().map(|ip| make(Some(*ip))).collect()
391    }
392}
393
394async fn test_operation(
395    client: &Client,
396    config: &SelfTestConfig,
397    op: &AnnotatedOperation,
398    geo_ip: Option<IpAddr>,
399) -> OperationResult {
400    // Round 25 — track the sink length BEFORE we run any probes for
401    // this operation, so that after the probes finish we can mutate
402    // exactly the entries that belong to this op (the capture sink is
403    // shared but `run_self_test` iterates operations sequentially).
404    // Used by the response-schema validation pass below.
405    let sink_start = config.capture.as_ref().and_then(|s| s.lock().ok().map(|g| g.len()));
406
407    let url = build_url_with_base(
408        &config.target_url,
409        config.base_path.as_deref(),
410        &op.path,
411        &op.path_params,
412    );
413    let method = Method::from_bytes(op.method.to_uppercase().as_bytes()).unwrap_or(Method::GET);
414
415    // Round 18.5 — pre-compute the operation's effective headers
416    // with the geo source IP baked in. Doing it once here keeps the
417    // per-case `send_case` calls below unchanged. When `geo_ip` is
418    // None the result equals `op.header_params`.
419    let op_headers = effective_op_headers(&op.header_params, geo_ip, &config.geo_source_headers);
420
421    // ── Positive case ────────────────────────────────────────────
422    let positive = send_case(
423        client,
424        config,
425        method.clone(),
426        &url,
427        "positive",
428        false,
429        op.sample_body.as_deref(),
430        op.query_params.clone(),
431        op_headers.clone(),
432    )
433    .await;
434
435    // ── Negative cases ───────────────────────────────────────────
436    let mut negatives = Vec::new();
437
438    // (a) empty body when one is required.
439    //
440    // Round 16 — drop the `sample_body.is_some()` precondition. Operations
441    // whose body annotator couldn't synthesize a sample previously got
442    // zero negatives (so the self-test reported "all passing" even on
443    // POST /resource with a required body). The spec saying the operation
444    // *has* a request body is enough — an empty object is a valid
445    // negative regardless of whether we have a positive sample.
446    if op.request_body_content_type.is_some() {
447        negatives.push(
448            send_case(
449                client,
450                config,
451                method.clone(),
452                &url,
453                "request-body:empty",
454                true,
455                Some("{}"),
456                op.query_params.clone(),
457                op_headers.clone(),
458            )
459            .await,
460        );
461
462        // (b) wrong-shaped body (array instead of object) — exercises
463        // top-level type validation independently of which fields are
464        // required.
465        negatives.push(
466            send_case(
467                client,
468                config,
469                method.clone(),
470                &url,
471                "request-body:wrong-type",
472                true,
473                Some("[]"),
474                op.query_params.clone(),
475                op_headers.clone(),
476            )
477            .await,
478        );
479
480        // Round 25 (k) — content-type swap probes.
481        //
482        // For operations declaring `application/json` request bodies, send
483        // the SAME json payload (or a synthesised one) under four other
484        // content types: `application/xml`, `application/yaml`,
485        // `multipart/form-data`, `application/x-www-form-urlencoded`.
486        // The spec says the endpoint accepts only JSON, so a strict server
487        // should respond 415 Unsupported Media Type (or 400 if it tries
488        // to parse and fails). A 2xx means the server is accepting
489        // payloads outside its declared content negotiation, which is the
490        // failure mode behind a lot of "we crashed on a malformed XML
491        // upload" incidents.
492        //
493        // Variant (a) of Srikanth's round-23 g ask: lie about the
494        // Content-Type header. The body shape is honest JSON; only the
495        // header is swapped. Variant (b) (JSON envelope with embedded
496        // non-JSON field values) is deferred to round 26 because it
497        // requires a schema-aware field walker.
498        if op
499            .request_body_content_type
500            .as_deref()
501            .map(|ct| ct.contains("json"))
502            .unwrap_or(false)
503        {
504            let payload = op.sample_body.as_deref().unwrap_or("{}");
505            for (ct, label) in CONTENT_TYPE_SWAP_VARIANTS {
506                negatives.push(
507                    send_case_with_extra(
508                        client,
509                        config,
510                        method.clone(),
511                        &url,
512                        label,
513                        true,
514                        Some(payload),
515                        op.query_params.clone(),
516                        // Strip any Content-Type already on the operation
517                        // headers (the spec's positive value) so the
518                        // probe's value is the only one the server sees.
519                        op_headers
520                            .iter()
521                            .filter(|(k, _)| !k.eq_ignore_ascii_case("content-type"))
522                            .cloned()
523                            .collect(),
524                        // The wrong Content-Type rides on `extra_headers`
525                        // so it lands AFTER `send_case_with_extra`'s
526                        // unconditional `application/json` insertion in
527                        // request-body mode. Actually `send_case_with_extra`
528                        // only sets Content-Type when a body is present
529                        // AND there's no manual override; passing the
530                        // override here wins because reqwest preserves
531                        // the last-set header value.
532                        vec![("Content-Type".to_string(), (*ct).to_string())],
533                    )
534                    .await,
535                );
536            }
537
538            // Round 27 (k variant b) — embedded non-JSON content
539            // inside a valid JSON envelope. Content-Type stays
540            // application/json (honest) and the body parses as JSON;
541            // only the string-valued payload changes. We expect 2xx-3xx
542            // because the envelope is spec-shape, so the probe surfaces
543            // servers that crash (5xx) trying to parse the embedded
544            // snippet as XML/YAML/etc. A 4xx is also a finding because
545            // it usually means the server's pattern/format validator
546            // tripped on the payload contents, but the user can decide
547            // from the JSONL whether that's a bug or correct narrow-
548            // string-field behaviour.
549            for (label, snippet) in EMBEDDED_CONTENT_VARIANTS {
550                let payload = op.sample_body.as_deref().unwrap_or("{}");
551                let body = embed_payload_in_first_string_field(payload, snippet);
552                negatives.push(
553                    send_case(
554                        client,
555                        config,
556                        method.clone(),
557                        &url,
558                        label,
559                        // expected_4xx=false: any non-2xx is a probe
560                        // failure. 5xx in particular is "server panicked
561                        // on the embedded content".
562                        false,
563                        Some(&body),
564                        op.query_params.clone(),
565                        op_headers.clone(),
566                    )
567                    .await,
568                );
569            }
570        }
571
572        // Round 17.2 — schema-aware negatives.
573        //
574        // When both a positive sample AND the resolved body schema are
575        // available, mutate the sample per-field (type mismatch,
576        // min/max bounds, pattern, enum out-of-range, required-field
577        // removal) and assert each is rejected with 4xx. Capped at
578        // SCHEMA_MUTATION_CAP per operation so a 100-property body
579        // doesn't explode the test matrix.
580        if let (Some(sample_str), Some(schema)) =
581            (op.sample_body.as_deref(), op.request_body_schema.as_ref())
582        {
583            if let Ok(sample) = serde_json::from_str::<serde_json::Value>(sample_str) {
584                let mutations = super::schema_mutator::mutate_body(&sample, schema);
585                for m in mutations.into_iter().take(SCHEMA_MUTATION_CAP) {
586                    let body_str = serde_json::to_string(&m.body).unwrap_or_default();
587                    negatives.push(
588                        send_case(
589                            client,
590                            config,
591                            method.clone(),
592                            &url,
593                            &m.label,
594                            true,
595                            Some(&body_str),
596                            op.query_params.clone(),
597                            // Round 24 (f) — was `op.header_params`, which
598                            // skipped the geo-IP header. Use `op_headers`
599                            // so the geo IP rides with the negative probe
600                            // too (positive vs negative coverage must be
601                            // symmetric, otherwise a GEODB front-end sees
602                            // the rotating IP only on positives).
603                            op_headers.clone(),
604                        )
605                        .await,
606                    );
607                }
608            }
609        }
610    }
611
612    // Round 17.2 — URI-length probe. Spec-agnostic but schema-aware in
613    // spirit: most servers cap URIs at 8 KB or so. Append a 9 KB query
614    // string to the URL and expect 414 URI Too Long (or 400). Skipped
615    // for operations that already have a heavy positive query.
616    {
617        let pad = "p=".to_string() + &"x".repeat(9_000);
618        let bad_url = if url.contains('?') {
619            format!("{url}&{pad}")
620        } else {
621            format!("{url}?{pad}")
622        };
623        negatives.push(
624            send_case(
625                client,
626                config,
627                method.clone(),
628                &bad_url,
629                "parameters:uri-too-long",
630                true,
631                op.sample_body.as_deref(),
632                op.query_params.clone(),
633                // Round 24 (f) — see schema-mutation note above. Use
634                // `op_headers` (carries geo IP) instead of bare
635                // `op.header_params`.
636                op_headers.clone(),
637            )
638            .await,
639        );
640    }
641
642    // (e) Round 16 — path-param type probe. Send the first path
643    // parameter as a literal `"self-test-invalid-id"`: a string that
644    // contains hyphens, won't parse as an integer, won't parse as a
645    // UUID, and won't match any typical regex pattern. Operations
646    // whose spec types the param as `integer` or `string` with a
647    // `format`/`pattern` will catch this (caught: server returned
648    // 4xx); operations whose spec lets path params be free-form
649    // strings will let it through (missed: server returned 2xx).
650    // Either outcome is informative: a category that's all "missed"
651    // tells the user their spec is loose on path-param types, which
652    // is itself worth knowing. Addresses Srikanth's "always all
653    // passing" report — operations with a path param now produce at
654    // least one probe instead of zero.
655    if !op.path_params.is_empty() {
656        let mut url_with_placeholder = op.path.clone();
657        if let Some((first_name, _)) = op.path_params.first() {
658            // Substitute every other path-param with its sample so the
659            // route shape stays intact and only the first param is bad.
660            for (name, value) in op.path_params.iter().skip(1) {
661                if !value.is_empty() {
662                    url_with_placeholder =
663                        url_with_placeholder.replace(&format!("{{{name}}}"), value);
664                }
665            }
666            // Substitute the first param with a guaranteed-invalid
667            // sentinel that's unlikely to match any reasonable schema:
668            // contains characters disallowed in numeric IDs *and* UUIDs.
669            url_with_placeholder =
670                url_with_placeholder.replace(&format!("{{{first_name}}}"), "self-test-invalid-id");
671            // Round 18.1 — honour `base_path` here too, otherwise the
672            // probe URL differs from the positive case and the
673            // resulting 404 is misattributed to "bad path param".
674            let bad_url = build_url_with_base(
675                &config.target_url,
676                config.base_path.as_deref(),
677                &url_with_placeholder,
678                &[],
679            );
680            negatives.push(
681                send_case(
682                    client,
683                    config,
684                    method.clone(),
685                    &bad_url,
686                    "parameters:bad-path-param",
687                    true,
688                    op.sample_body.as_deref(),
689                    op.query_params.clone(),
690                    op_headers.clone(),
691                )
692                .await,
693            );
694        }
695    }
696
697    // (c) drop the first required query param
698    if !op.query_params.is_empty() {
699        let mut q = op.query_params.clone();
700        q.remove(0);
701        negatives.push(
702            send_case(
703                client,
704                config,
705                method.clone(),
706                &url,
707                "parameters:missing-query",
708                true,
709                op.sample_body.as_deref(),
710                q,
711                op_headers.clone(),
712            )
713            .await,
714        );
715    }
716
717    // (s) Round 17.3 — security probes.
718    //
719    // Operations whose spec declares a security requirement get a
720    // dedicated set of negatives. The point isn't to test whether the
721    // server's *real* auth works (the positive case already does that
722    // via `extra_headers`) — it's to check whether deliberately-bad
723    // credentials are still rejected, which is exactly the failure
724    // mode that lets an attacker through a half-wired validator.
725    //
726    // Each probe replaces or omits the relevant auth credential and
727    // expects 401 / 403. A 2xx here is a hard finding: "spec says
728    // this endpoint is protected, server let unauthenticated /
729    // wrong-credential traffic through".
730    //
731    // Bounded: at most one probe per declared scheme kind, so an
732    // operation with 3 security requirements doesn't 4× the request
733    // volume. Skips entirely when `op.security_schemes` is empty.
734    for probe in build_security_probes(&op.security_schemes) {
735        // Strip any pre-existing Authorization or known API-key
736        // header from extra_headers + header_params so the probe
737        // value is the *only* credential the server sees.
738        let stripped_extra = strip_auth(&config.extra_headers, &op.security_schemes);
739        let stripped_headers = strip_auth(&op.header_params, &op.security_schemes);
740        let stripped_query = strip_auth_query(&op.query_params, &op.security_schemes);
741        let mut req_headers = stripped_headers;
742        for (k, v) in &probe.headers {
743            req_headers.push((k.clone(), v.clone()));
744        }
745        // Round 24 (f) — security probes build req_headers from
746        // `op.header_params` directly (we need the stripped-auth
747        // variant), so the geo-IP header doesn't ride along
748        // automatically. Append it here so a GEODB / WAF in front
749        // of the auth layer still sees the rotating source IP.
750        if let Some(ip) = geo_ip {
751            let ip_str = ip.to_string();
752            for h in &config.geo_source_headers {
753                let already = req_headers.iter().any(|(k, _)| k.eq_ignore_ascii_case(h));
754                if !already {
755                    req_headers.push((h.clone(), ip_str.clone()));
756                }
757            }
758        }
759        let mut req_query = stripped_query;
760        for (k, v) in &probe.query {
761            req_query.push((k.clone(), v.clone()));
762        }
763        negatives.push(
764            send_case_with_extra(
765                client,
766                config,
767                method.clone(),
768                &url,
769                &probe.label,
770                true,
771                op.sample_body.as_deref(),
772                req_query,
773                req_headers,
774                stripped_extra,
775            )
776            .await,
777        );
778    }
779
780    // (d) drop the first required header
781    if !op.header_params.is_empty() {
782        // Round 24 (f) — start from `op_headers` (so the geo IP rides
783        // along) and only strip the first OPERATION-declared header.
784        // Slicing past `op.header_params.len()` would otherwise risk
785        // dropping the geo header itself; `op_headers` is built as
786        // `op.header_params ++ geo` so index 0 is always operational.
787        let mut h = op_headers.clone();
788        if !h.is_empty() {
789            h.remove(0);
790        }
791        negatives.push(
792            send_case(
793                client,
794                config,
795                method.clone(),
796                &url,
797                "parameters:missing-header",
798                true,
799                op.sample_body.as_deref(),
800                op.query_params.clone(),
801                h,
802            )
803            .await,
804        );
805    }
806
807    // (w) Round 17.5 — OWASP/WAF unification.
808    //
809    // Pull one canonical payload per OWASP category from the existing
810    // `SecurityPayloads` library and emit an injection probe per
811    // category. Targets in priority order: (1) substitute the first
812    // query param's value, (2) substitute the first string field of
813    // the positive JSON body, (3) skip if neither is available.
814    //
815    // Label format `owasp:<category>`, so the existing
816    // `negative_caught` / `negative_missed` rollup groups all OWASP
817    // findings under one `owasp` bucket. Expected 4xx (server should
818    // reject malicious input). A 5xx is a hard finding (server
819    // crashed on the payload); a 2xx is a soft finding (input passed
820    // through unfiltered — may or may not be a real vuln).
821    //
822    // Bounded: at most one probe per category (7 categories total).
823    // Skips the operation entirely if no injection target is
824    // available — open GET endpoints with no params get zero OWASP
825    // probes, no false signal.
826    for probe in build_owasp_probes(op) {
827        negatives.push(
828            send_case(
829                client,
830                config,
831                method.clone(),
832                &url,
833                &probe.label,
834                true,
835                probe.body.as_deref(),
836                probe.query,
837                // Round 24 (f) — OWASP injection probes must also
838                // carry the geo IP, otherwise a WAF / GEODB rule
839                // tuned to a specific source IP would silently let
840                // them through.
841                op_headers.clone(),
842            )
843            .await,
844        );
845    }
846
847    // Round 25 — response-body shape validation pass. For each capture
848    // this op pushed onto the sink, look up the spec's schema for the
849    // actual response status and validate. Result lands in
850    // `response_schema_error` (Some(message) on failure, None on
851    // pass or no-schema-for-this-status). Runs only when the user
852    // opted in AND capture is on (we need the body).
853    if config.validate_response_schemas {
854        if let (Some(sink), Some(start)) = (config.capture.as_ref(), sink_start) {
855            if !op.response_schemas.is_empty() {
856                if let Ok(mut guard) = sink.lock() {
857                    let end = guard.len();
858                    for i in start..end {
859                        let Some(entry) = guard.get_mut(i) else {
860                            continue;
861                        };
862                        let Some(body) = entry.response_body.as_deref() else {
863                            continue;
864                        };
865                        let Some(schema) = op.response_schemas.get(&entry.response_status) else {
866                            continue;
867                        };
868                        entry.response_schema_error = validate_body_against_schema(body, schema);
869                    }
870                }
871            }
872        }
873    }
874
875    OperationResult {
876        method: op.method.clone(),
877        path: op.path.clone(),
878        positive: Some(positive),
879        negatives,
880    }
881}
882
883/// Round 25 — validate a JSON body string against an OpenAPI response
884/// schema (already converted to a `serde_json::Value`). Returns
885/// `Some(message)` describing the first violation, or `None` on a
886/// clean pass / non-JSON body / schema-build failure (in which case
887/// the absence of an error means "we didn't have anything to compare
888/// against", not "passed"; the caller-side semantics treat absence as
889/// success because that's what the user sees as silence).
890/// Round 27 (k variant b) — return a JSON body string identical to
891/// `sample` except that the first string-valued leaf has been
892/// replaced with `snippet`. Walks objects depth-first and stops at
893/// the first string. If `sample` is not parseable JSON, or has no
894/// string fields, falls back to wrapping the snippet under a `data`
895/// key so the probe still has a body to send: `{"data": <snippet>}`.
896/// The result is always valid JSON ready for `application/json`.
897fn embed_payload_in_first_string_field(sample: &str, snippet: &str) -> String {
898    let mut parsed: serde_json::Value = match serde_json::from_str(sample) {
899        Ok(v) => v,
900        Err(_) => return format!(r#"{{"data":{}}}"#, json_quote(snippet)),
901    };
902    if !replace_first_string(&mut parsed, snippet) {
903        return format!(r#"{{"data":{}}}"#, json_quote(snippet));
904    }
905    serde_json::to_string(&parsed)
906        .unwrap_or_else(|_| format!(r#"{{"data":{}}}"#, json_quote(snippet)))
907}
908
909/// Helper for `embed_payload_in_first_string_field`: recursively
910/// walk the value and replace the FIRST string leaf encountered.
911/// Returns true when a replacement happened. Honors document order
912/// for objects (BTreeMap-backed `serde_json::Map` iterates in
913/// insertion order) so the choice of which field to mutate is
914/// stable across runs.
915fn replace_first_string(v: &mut serde_json::Value, snippet: &str) -> bool {
916    match v {
917        serde_json::Value::String(s) => {
918            *s = snippet.to_string();
919            true
920        }
921        serde_json::Value::Object(map) => {
922            for (_k, child) in map.iter_mut() {
923                if replace_first_string(child, snippet) {
924                    return true;
925                }
926            }
927            false
928        }
929        serde_json::Value::Array(arr) => {
930            for child in arr.iter_mut() {
931                if replace_first_string(child, snippet) {
932                    return true;
933                }
934            }
935            false
936        }
937        _ => false,
938    }
939}
940
941/// Helper for `embed_payload_in_first_string_field`'s fallback: take
942/// an arbitrary string and quote it for embedding inside a JSON
943/// literal. `serde_json::to_string(&value)` handles escaping
944/// correctly for unicode + control chars + quotes.
945fn json_quote(s: &str) -> String {
946    serde_json::to_string(s).unwrap_or_else(|_| "\"\"".to_string())
947}
948
949fn validate_body_against_schema(body: &str, schema: &serde_json::Value) -> Option<String> {
950    let parsed: serde_json::Value = serde_json::from_str(body).ok()?;
951    let validator = jsonschema::validator_for(schema).ok()?;
952    let mut errors = validator.iter_errors(&parsed);
953    let first = errors.next()?;
954    // Round 26 — Srikanth on 0.3.169: the prior `format!("{:?}", first.kind)
955    // .split('(').next()` produced "Type { kind: Single" (broken Rust
956    // syntax, mismatched braces). Switch to the human-readable mapping
957    // already used in executor.rs: handle the common kinds (Type,
958    // Required, AdditionalProperties, Enum, MinLength, MaxLength,
959    // Minimum, Maximum, Pattern) explicitly; fall back to the
960    // jsonschema crate's Display impl on the error (which produces
961    // something like "{...} is not of type \"string\"") for the long
962    // tail. Combined with `at <instance-path>` for the field location.
963    let path = first.instance_path.to_string();
964    let path = if path.is_empty() { "/" } else { path.as_str() };
965    let kind_msg: String = match &first.kind {
966        jsonschema::error::ValidationErrorKind::Type { kind } => {
967            // `kind` is `TypeKind::Single(JsonType)` or
968            // `TypeKind::Multiple(JsonTypeSet)`. `JsonType` has its
969            // own `Display` impl ("string", "object", etc.).
970            match kind {
971                jsonschema::error::TypeKind::Single(t) => format!("expected type {t}"),
972                jsonschema::error::TypeKind::Multiple(_) => "expected one of multiple types".into(),
973            }
974        }
975        jsonschema::error::ValidationErrorKind::Required { property } => {
976            format!("required field missing: {property}")
977        }
978        jsonschema::error::ValidationErrorKind::AdditionalProperties { unexpected } => {
979            format!("unexpected additional properties: {unexpected:?}")
980        }
981        jsonschema::error::ValidationErrorKind::Enum { options } => {
982            format!("value not in allowed enum: {options}")
983        }
984        jsonschema::error::ValidationErrorKind::MinLength { limit } => {
985            format!("string shorter than min length ({limit})")
986        }
987        jsonschema::error::ValidationErrorKind::MaxLength { limit } => {
988            format!("string longer than max length ({limit})")
989        }
990        jsonschema::error::ValidationErrorKind::Minimum { limit } => {
991            format!("value below minimum ({limit})")
992        }
993        jsonschema::error::ValidationErrorKind::Maximum { limit } => {
994            format!("value above maximum ({limit})")
995        }
996        jsonschema::error::ValidationErrorKind::Pattern { pattern } => {
997            format!("value did not match pattern {pattern}")
998        }
999        // Long tail: lean on jsonschema's Display impl, which is the
1000        // built-in human-readable error message ("X is not of type Y").
1001        // Strip trailing newlines so the JSONL line stays one line.
1002        _ => first.to_string().trim().to_string(),
1003    };
1004    Some(format!("at {path}: {kind_msg}"))
1005}
1006
1007/// Round 17.5 — one OWASP injection probe to send.
1008#[derive(Debug, Clone)]
1009struct OwaspProbe {
1010    label: String,
1011    body: Option<String>,
1012    query: Vec<(String, String)>,
1013}
1014
1015/// Build one OWASP probe per `SecurityCategory` for `op`. Targets the
1016/// first query param if any, else the first string field of the
1017/// positive JSON body. Returns empty if neither target is available.
1018fn build_owasp_probes(op: &AnnotatedOperation) -> Vec<OwaspProbe> {
1019    use crate::security_payloads::{SecurityCategory, SecurityPayloads};
1020
1021    let categories = [
1022        SecurityCategory::SqlInjection,
1023        SecurityCategory::Xss,
1024        SecurityCategory::CommandInjection,
1025        SecurityCategory::PathTraversal,
1026        SecurityCategory::Ssti,
1027        SecurityCategory::LdapInjection,
1028        SecurityCategory::Xxe,
1029    ];
1030
1031    // Pick an injection target ONCE per operation; reuse it across
1032    // categories. (A single op gets up to 7 probes — one per category
1033    // — all attacking the same field.)
1034    let injection_target = pick_injection_target(op);
1035    let Some(target) = injection_target else {
1036        return Vec::new();
1037    };
1038
1039    let mut probes = Vec::new();
1040    for cat in categories {
1041        // Take the *first* payload from each category. The
1042        // collection's first entry is the canonical low-risk
1043        // representative; later entries include time-based / blind
1044        // probes that aren't useful as a one-shot rejection test.
1045        let Some(payload) = SecurityPayloads::get_by_category(cat).into_iter().next() else {
1046            continue;
1047        };
1048        let mut query = op.query_params.clone();
1049        let mut body = op.sample_body.clone();
1050        match &target {
1051            InjectionTarget::Query(idx) => {
1052                if let Some(slot) = query.get_mut(*idx) {
1053                    slot.1 = payload.payload.clone();
1054                }
1055            }
1056            InjectionTarget::BodyStringField(field) => {
1057                body = inject_into_body_field(body.as_deref(), field, &payload.payload);
1058            }
1059        }
1060        probes.push(OwaspProbe {
1061            label: format!("owasp:{}", cat),
1062            body,
1063            query,
1064        });
1065    }
1066    probes
1067}
1068
1069#[derive(Debug, Clone)]
1070enum InjectionTarget {
1071    Query(usize),
1072    BodyStringField(String),
1073}
1074
1075fn pick_injection_target(op: &AnnotatedOperation) -> Option<InjectionTarget> {
1076    if !op.query_params.is_empty() {
1077        return Some(InjectionTarget::Query(0));
1078    }
1079    let sample = op.sample_body.as_deref()?;
1080    let parsed: serde_json::Value = serde_json::from_str(sample).ok()?;
1081    let obj = parsed.as_object()?;
1082    for (k, v) in obj {
1083        if v.is_string() {
1084            return Some(InjectionTarget::BodyStringField(k.clone()));
1085        }
1086    }
1087    None
1088}
1089
1090/// Replace the value of `field` in a JSON-object body with `payload`.
1091/// Returns the mutated body as a JSON string. Returns `None` if the
1092/// body doesn't parse as a JSON object.
1093fn inject_into_body_field(body: Option<&str>, field: &str, payload: &str) -> Option<String> {
1094    let raw = body?;
1095    let mut parsed: serde_json::Value = serde_json::from_str(raw).ok()?;
1096    let obj = parsed.as_object_mut()?;
1097    obj.insert(field.to_string(), serde_json::json!(payload));
1098    serde_json::to_string(&parsed).ok()
1099}
1100
1101#[allow(clippy::too_many_arguments)]
1102/// Round 17.3 — one synthesised bad credential to send.
1103#[derive(Debug, Clone)]
1104struct SecurityProbe {
1105    /// Self-test label, e.g. `security:bad-bearer`.
1106    label: String,
1107    /// Headers to attach to the probe request.
1108    headers: Vec<(String, String)>,
1109    /// Query parameters to attach (API key in query case).
1110    query: Vec<(String, String)>,
1111}
1112
1113/// For each declared security scheme, produce one bad-credential
1114/// probe plus a single "no auth at all" probe that exercises the
1115/// missing-credential code path. Deduplicates by scheme kind so an
1116/// operation declaring `[bearer, bearer]` only yields one Bearer
1117/// probe.
1118fn build_security_probes(schemes: &[SecuritySchemeInfo]) -> Vec<SecurityProbe> {
1119    if schemes.is_empty() {
1120        return Vec::new();
1121    }
1122    let mut probes: Vec<SecurityProbe> = Vec::new();
1123    let mut seen_bearer = false;
1124    let mut seen_basic = false;
1125    // `(loc_tag, name)` — ApiKeyLocation doesn't implement Ord, so
1126    // we tag it with a short discriminant string for dedup.
1127    let mut seen_apikey: std::collections::BTreeSet<(&'static str, String)> = Default::default();
1128    for s in schemes {
1129        match s {
1130            SecuritySchemeInfo::Bearer if !seen_bearer => {
1131                seen_bearer = true;
1132                probes.push(SecurityProbe {
1133                    label: "security:bad-bearer".into(),
1134                    headers: vec![(
1135                        "Authorization".into(),
1136                        "Bearer self-test-invalid-token".into(),
1137                    )],
1138                    query: Vec::new(),
1139                });
1140            }
1141            SecuritySchemeInfo::Basic if !seen_basic => {
1142                seen_basic = true;
1143                // base64("self-test:invalid") — valid base64, wrong creds.
1144                probes.push(SecurityProbe {
1145                    label: "security:bad-basic".into(),
1146                    headers: vec![(
1147                        "Authorization".into(),
1148                        "Basic c2VsZi10ZXN0OmludmFsaWQ=".into(),
1149                    )],
1150                    query: Vec::new(),
1151                });
1152            }
1153            SecuritySchemeInfo::ApiKey { location, name } => {
1154                let loc_tag = match location {
1155                    ApiKeyLocation::Header => "header",
1156                    ApiKeyLocation::Query => "query",
1157                    ApiKeyLocation::Cookie => "cookie",
1158                };
1159                if seen_apikey.contains(&(loc_tag, name.clone())) {
1160                    continue;
1161                }
1162                seen_apikey.insert((loc_tag, name.clone()));
1163                let label = format!("security:bad-apikey:{}", name);
1164                let bad = "self-test-invalid-key".to_string();
1165                match location {
1166                    ApiKeyLocation::Header => probes.push(SecurityProbe {
1167                        label,
1168                        headers: vec![(name.clone(), bad)],
1169                        query: Vec::new(),
1170                    }),
1171                    ApiKeyLocation::Query => probes.push(SecurityProbe {
1172                        label,
1173                        headers: Vec::new(),
1174                        query: vec![(name.clone(), bad)],
1175                    }),
1176                    ApiKeyLocation::Cookie => probes.push(SecurityProbe {
1177                        label,
1178                        headers: vec![("Cookie".into(), format!("{}={}", name, bad))],
1179                        query: Vec::new(),
1180                    }),
1181                }
1182            }
1183            _ => {}
1184        }
1185    }
1186    // Always add a "no auth at all" probe when *any* security scheme
1187    // is declared — useful even if all schemes failed to resolve to a
1188    // testable kind, because it surfaces validators that aren't
1189    // checking auth presence at all.
1190    probes.push(SecurityProbe {
1191        label: "security:no-auth".into(),
1192        headers: Vec::new(),
1193        query: Vec::new(),
1194    });
1195    probes
1196}
1197
1198/// Remove Authorization and any API-key headers declared by the
1199/// operation's security schemes from `headers`, so a security probe
1200/// can supply its own credential (or none) cleanly.
1201fn strip_auth(
1202    headers: &[(String, String)],
1203    schemes: &[SecuritySchemeInfo],
1204) -> Vec<(String, String)> {
1205    let mut apikey_headers: std::collections::BTreeSet<String> = Default::default();
1206    for s in schemes {
1207        if let SecuritySchemeInfo::ApiKey {
1208            location: ApiKeyLocation::Header,
1209            name,
1210        } = s
1211        {
1212            apikey_headers.insert(name.to_lowercase());
1213        }
1214        if let SecuritySchemeInfo::ApiKey {
1215            location: ApiKeyLocation::Cookie,
1216            ..
1217        } = s
1218        {
1219            apikey_headers.insert("cookie".into());
1220        }
1221    }
1222    headers
1223        .iter()
1224        .filter(|(k, _)| {
1225            let lk = k.to_lowercase();
1226            lk != "authorization" && !apikey_headers.contains(&lk)
1227        })
1228        .cloned()
1229        .collect()
1230}
1231
1232/// Remove API-key query parameters declared by the operation's
1233/// security schemes from `query`, so a probe can supply its own.
1234fn strip_auth_query(
1235    query: &[(String, String)],
1236    schemes: &[SecuritySchemeInfo],
1237) -> Vec<(String, String)> {
1238    let mut apikey_query: std::collections::BTreeSet<String> = Default::default();
1239    for s in schemes {
1240        if let SecuritySchemeInfo::ApiKey {
1241            location: ApiKeyLocation::Query,
1242            name,
1243        } = s
1244        {
1245            apikey_query.insert(name.clone());
1246        }
1247    }
1248    query.iter().filter(|(k, _)| !apikey_query.contains(k)).cloned().collect()
1249}
1250
1251/// Variant of `send_case` that takes an explicit `extra_headers`
1252/// (rather than reading them from `config`). Used by security probes
1253/// to substitute or strip the configured Authorization header.
1254#[allow(clippy::too_many_arguments)]
1255async fn send_case_with_extra(
1256    client: &Client,
1257    config: &SelfTestConfig,
1258    method: Method,
1259    url: &str,
1260    label: &str,
1261    expected_4xx: bool,
1262    body: Option<&str>,
1263    query: Vec<(String, String)>,
1264    headers: Vec<(String, String)>,
1265    extra_headers: Vec<(String, String)>,
1266) -> CaseOutcome {
1267    let mut req = client.request(method.clone(), url);
1268    let mut capture_headers: BTreeMap<String, String> = BTreeMap::new();
1269    for (k, v) in &query {
1270        req = req.query(&[(k.as_str(), v.as_str())]);
1271    }
1272    // Attach the body FIRST with a default Content-Type. Subsequent
1273    // header passes (the operation's headers, then extra_headers) can
1274    // overwrite the Content-Type — that's what makes the round-25 (k)
1275    // content-type-swap probes work: they pass a wrong Content-Type
1276    // via extra_headers and reqwest's last-write-wins keeps it.
1277    if let Some(b) = body {
1278        req = req
1279            .header(reqwest::header::CONTENT_TYPE, "application/json")
1280            .body(b.to_string());
1281        capture_headers.insert("Content-Type".to_string(), "application/json".to_string());
1282    }
1283    for (k, v) in &headers {
1284        req = req.header(k, v);
1285        capture_headers.insert(k.clone(), v.clone());
1286    }
1287    for (k, v) in &extra_headers {
1288        req = req.header(k, v);
1289        capture_headers.insert(k.clone(), v.clone());
1290    }
1291    let (actual_status, response_capture) = match req.send().await {
1292        Ok(resp) => {
1293            let status = resp.status().as_u16();
1294            if let Some(sink) = &config.capture {
1295                let resp_headers: BTreeMap<String, String> = resp
1296                    .headers()
1297                    .iter()
1298                    .map(|(k, v)| (k.as_str().to_string(), v.to_str().unwrap_or("").to_string()))
1299                    .collect();
1300                let text = resp.text().await.unwrap_or_default();
1301                let (rb, truncated) = truncate_body_for_capture(&text);
1302                (status, Some((Some((rb, truncated)), resp_headers, None, sink.clone())))
1303            } else {
1304                (status, None)
1305            }
1306        }
1307        Err(e) => {
1308            let err_str = e.to_string();
1309            if let Some(sink) = &config.capture {
1310                (0, Some((None, BTreeMap::new(), Some(err_str), sink.clone())))
1311            } else {
1312                (0, None)
1313            }
1314        }
1315    };
1316    let passed = if expected_4xx {
1317        (400..500).contains(&actual_status)
1318    } else {
1319        (200..400).contains(&actual_status)
1320    };
1321    if let Some((resp_body, resp_headers, error, sink)) = response_capture {
1322        let (request_body, request_body_truncated) = match body {
1323            Some(b) => {
1324                let (rb, t) = truncate_body_for_capture(b);
1325                (Some(rb), t)
1326            }
1327            None => (None, false),
1328        };
1329        let (response_body, response_body_truncated) = match resp_body {
1330            Some((rb, t)) => (Some(rb), t),
1331            None => (None, false),
1332        };
1333        let entry = CaseCapture {
1334            label: label.to_string(),
1335            method: method.to_string(),
1336            url: build_query_url(url, &query),
1337            request_headers: capture_headers,
1338            request_body,
1339            request_body_truncated,
1340            response_status: actual_status,
1341            response_headers: resp_headers,
1342            response_body,
1343            response_body_truncated,
1344            error,
1345            // Filled in by the per-operation validation pass after
1346            // every probe finishes; the capture itself is unaware of
1347            // the schema map.
1348            response_schema_error: None,
1349        };
1350        if let Ok(mut guard) = sink.lock() {
1351            guard.push(entry);
1352        }
1353    }
1354    CaseOutcome {
1355        label: label.to_string(),
1356        expected_4xx,
1357        actual_status,
1358        passed,
1359    }
1360}
1361
1362// HTTP request shape needs all of these: client, config (for capture
1363// sink + extra headers), method, url, label (probe id), expected_4xx
1364// (pass/fail decision), body, query, headers. A struct wrapper would
1365// just move the arity from positional to field access without making
1366// the call sites clearer.
1367#[allow(clippy::too_many_arguments)]
1368async fn send_case(
1369    client: &Client,
1370    config: &SelfTestConfig,
1371    method: Method,
1372    url: &str,
1373    label: &str,
1374    expected_4xx: bool,
1375    body: Option<&str>,
1376    query: Vec<(String, String)>,
1377    headers: Vec<(String, String)>,
1378) -> CaseOutcome {
1379    // Forwarding to `send_case_with_extra` keeps the capture logic in
1380    // one place so request/response tracing can't drift between the
1381    // two entrypoints.
1382    send_case_with_extra(
1383        client,
1384        config,
1385        method,
1386        url,
1387        label,
1388        expected_4xx,
1389        body,
1390        query,
1391        headers,
1392        config.extra_headers.clone(),
1393    )
1394    .await
1395}
1396
1397/// Round 23 (c-iii) — rebuild the query-stringified URL for capture so
1398/// the JSONL trace shows the URL that actually went over the wire
1399/// (reqwest applies `.query(..)` after the request URL string is
1400/// rendered, so capturing the raw `url` argument alone loses the
1401/// query params).
1402fn build_query_url(base: &str, query: &[(String, String)]) -> String {
1403    if query.is_empty() {
1404        return base.to_string();
1405    }
1406    let qs: String = query
1407        .iter()
1408        .map(|(k, v)| format!("{}={}", urlencoding::encode(k), urlencoding::encode(v)))
1409        .collect::<Vec<_>>()
1410        .join("&");
1411    if base.contains('?') {
1412        format!("{base}&{qs}")
1413    } else {
1414        format!("{base}?{qs}")
1415    }
1416}
1417
1418/// Substitute `{param}` placeholders in the spec path with their
1419/// sample values from `path_params`, then prepend `target_url`. Empty
1420/// values are kept as `{param}` so an upstream router still matches
1421/// the template — useful when `path_params` is empty and we want to
1422/// hit the same route the spec defines.
1423///
1424/// All current call sites went through `build_url_with_base` after
1425/// round 18.1, so this no-base-path helper is unused; keep it as the
1426/// documented shim for future external callers (one-arg simplification).
1427#[allow(dead_code)]
1428fn build_url(target: &str, path_template: &str, path_params: &[(String, String)]) -> String {
1429    build_url_with_base(target, None, path_template, path_params)
1430}
1431
1432/// Round 18.1 — variant of `build_url` that takes a `base_path`
1433/// (e.g. `Some("/api")`). When set, prepends it to the spec path so a
1434/// spec declaring `/users` against a target served behind `/api`
1435/// resolves to `<target>/api/users`. `base_path` is normalised: leading
1436/// `/` is auto-added, trailing `/` is stripped.
1437fn build_url_with_base(
1438    target: &str,
1439    base_path: Option<&str>,
1440    path_template: &str,
1441    path_params: &[(String, String)],
1442) -> String {
1443    let mut url = path_template.to_string();
1444    for (name, value) in path_params {
1445        let placeholder = format!("{{{}}}", name);
1446        if !value.is_empty() {
1447            url = url.replace(&placeholder, value);
1448        }
1449    }
1450    let target = target.trim_end_matches('/');
1451    let prefix = match base_path {
1452        Some(bp) if !bp.is_empty() => {
1453            let trimmed = bp.trim_end_matches('/');
1454            if trimmed.starts_with('/') {
1455                trimmed.to_string()
1456            } else {
1457                format!("/{}", trimmed)
1458            }
1459        }
1460        _ => String::new(),
1461    };
1462    let path = if url.starts_with('/') {
1463        url
1464    } else {
1465        format!("/{url}")
1466    };
1467    format!("{target}{prefix}{path}")
1468}
1469
1470#[cfg(test)]
1471mod tests {
1472    use super::*;
1473
1474    fn op(
1475        method: &str,
1476        path: &str,
1477        body: Option<&str>,
1478        query: Vec<(&str, &str)>,
1479        headers: Vec<(&str, &str)>,
1480        path_params: Vec<(&str, &str)>,
1481    ) -> AnnotatedOperation {
1482        AnnotatedOperation {
1483            method: method.into(),
1484            path: path.into(),
1485            features: Vec::new(),
1486            request_body_content_type: body.map(|_| "application/json".into()),
1487            sample_body: body.map(|s| s.to_string()),
1488            query_params: query.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1489            header_params: headers.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1490            path_params: path_params.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1491            response_schema: None,
1492            response_schemas: std::collections::BTreeMap::new(),
1493            request_body_schema: None,
1494            security_schemes: Vec::new(),
1495        }
1496    }
1497
1498    #[test]
1499    fn build_url_substitutes_path_params() {
1500        let url = build_url(
1501            "https://api.test/",
1502            "/users/{id}/posts/{pid}",
1503            &[("id".into(), "42".into()), ("pid".into(), "7".into())],
1504        );
1505        assert_eq!(url, "https://api.test/users/42/posts/7");
1506    }
1507
1508    /// Round 18.1 — a run where every positive 404s should be flagged
1509    /// as a likely target misconfiguration, not silently treated as a
1510    /// successful conformance run.
1511    #[test]
1512    fn detect_target_misconfiguration_when_all_positives_share_status() {
1513        let mut report = SelfTestReport {
1514            positive_pass: 0,
1515            positive_fail: 50,
1516            ..Default::default()
1517        };
1518        for i in 0..50 {
1519            report.operations.push(OperationResult {
1520                method: "GET".into(),
1521                path: format!("/r/{i}"),
1522                positive: Some(CaseOutcome {
1523                    label: "positive".into(),
1524                    expected_4xx: false,
1525                    actual_status: 404,
1526                    passed: false,
1527                }),
1528                negatives: Vec::new(),
1529            });
1530        }
1531        assert_eq!(report.detect_target_misconfiguration(), Some(404));
1532    }
1533
1534    #[test]
1535    fn detect_target_misconfiguration_returns_none_when_some_pass() {
1536        let mut report = SelfTestReport {
1537            positive_pass: 5,
1538            positive_fail: 50,
1539            ..Default::default()
1540        };
1541        for i in 0..55 {
1542            report.operations.push(OperationResult {
1543                method: "GET".into(),
1544                path: format!("/r/{i}"),
1545                positive: Some(CaseOutcome {
1546                    label: "positive".into(),
1547                    expected_4xx: false,
1548                    actual_status: if i < 5 { 200 } else { 404 },
1549                    passed: i < 5,
1550                }),
1551                negatives: Vec::new(),
1552            });
1553        }
1554        assert_eq!(report.detect_target_misconfiguration(), None);
1555    }
1556
1557    /// Round 18.1 — `--base-path /api` should prepend `/api` to
1558    /// every spec path. Pre-fix, the self-test ignored base_path and
1559    /// 404'd every positive when the deployed API was behind a path
1560    /// prefix.
1561    #[test]
1562    fn build_url_applies_base_path_when_present() {
1563        let url = build_url_with_base(
1564            "https://api.example.com",
1565            Some("/api"),
1566            "/users/{id}",
1567            &[("id".into(), "42".into())],
1568        );
1569        assert_eq!(url, "https://api.example.com/api/users/42");
1570    }
1571
1572    /// Round 18.1 — base_path is normalised: missing leading slash
1573    /// gets one added, trailing slash is stripped, empty string is
1574    /// the same as None.
1575    #[test]
1576    fn build_url_normalises_base_path() {
1577        let no_slash = build_url_with_base("https://t", Some("api"), "/x", &[]);
1578        assert_eq!(no_slash, "https://t/api/x");
1579        let trailing = build_url_with_base("https://t", Some("/api/"), "/x", &[]);
1580        assert_eq!(trailing, "https://t/api/x");
1581        let empty = build_url_with_base("https://t", Some(""), "/x", &[]);
1582        assert_eq!(empty, "https://t/x");
1583        let none = build_url_with_base("https://t", None, "/x", &[]);
1584        assert_eq!(none, "https://t/x");
1585    }
1586
1587    #[test]
1588    fn build_url_keeps_placeholders_when_no_sample() {
1589        let url = build_url("https://api.test", "/users/{id}", &[]);
1590        assert_eq!(url, "https://api.test/users/{id}");
1591    }
1592
1593    #[test]
1594    fn report_summary_calls_out_misses() {
1595        let r = SelfTestReport {
1596            positive_pass: 3,
1597            positive_fail: 0,
1598            negative_caught: BTreeMap::from([("request-body".into(), 2)]),
1599            negative_missed: BTreeMap::from([("request-body".into(), 1)]),
1600            operations: Vec::new(),
1601        };
1602        let summary = r.render_summary();
1603        assert!(summary.contains("Positives: 3 pass / 0 fail"));
1604        assert!(summary.contains("Negatives [request-body]: 2 caught / 1 missed"));
1605        assert!(summary.contains("⚠"));
1606        assert!(!r.all_passed());
1607    }
1608
1609    #[test]
1610    fn report_all_passed_when_no_miss() {
1611        let r = SelfTestReport {
1612            positive_pass: 5,
1613            positive_fail: 0,
1614            negative_caught: BTreeMap::from([("parameters".into(), 3)]),
1615            negative_missed: BTreeMap::new(),
1616            operations: Vec::new(),
1617        };
1618        assert!(r.all_passed());
1619        assert!(r.render_summary().contains("✓"));
1620    }
1621
1622    #[tokio::test]
1623    async fn run_self_test_against_unreachable_target_marks_all_failed() {
1624        // Use an obviously-dead port so we exercise the timeout/error
1625        // path without needing a live server in tests.
1626        let cfg = SelfTestConfig {
1627            target_url: "http://127.0.0.1:1".into(),
1628            timeout: Duration::from_millis(200),
1629            ..Default::default()
1630        };
1631        let ops = vec![op(
1632            "POST",
1633            "/users",
1634            Some("{\"name\":\"a\"}"),
1635            vec![],
1636            vec![],
1637            vec![],
1638        )];
1639        let report = run_self_test(&ops, &cfg).await.expect("client builds");
1640        // All cases hit the connect-error path → actual_status=0.
1641        // Positive expects 2xx-3xx → 0 is fail. Negatives expect 4xx
1642        // → 0 is also fail (we missed catching).
1643        assert_eq!(report.positive_fail, 1);
1644        assert!(report.negative_missed.values().sum::<usize>() >= 1);
1645        assert!(!report.all_passed());
1646    }
1647
1648    /// Round 17.2 — operations with both a positive sample AND a
1649    /// resolved request-body schema produce schema-driven negatives
1650    /// in addition to the spec-agnostic empty/wrong-type ones. The
1651    /// labels carry the field path so a per-category report can tell
1652    /// you exactly which field caught.
1653    #[tokio::test]
1654    async fn schema_driven_negatives_fire_when_schema_present() {
1655        use openapiv3::{ObjectType, ReferenceOr, Schema, SchemaData, SchemaKind, Type};
1656        let cfg = SelfTestConfig {
1657            target_url: "http://127.0.0.1:1".into(),
1658            timeout: Duration::from_millis(200),
1659            ..Default::default()
1660        };
1661        // Build an operation whose schema has a required `name` string
1662        // and an `age` integer. The mutator should produce, at
1663        // minimum: required-removed:name, required-removed:age,
1664        // type-mismatch:name, type-mismatch:age, integer-as-float:age,
1665        // plus the root-level type-mismatch.
1666        let mut obj = ObjectType::default();
1667        obj.properties.insert(
1668            "name".to_string(),
1669            ReferenceOr::Item(Box::new(Schema {
1670                schema_data: SchemaData::default(),
1671                schema_kind: SchemaKind::Type(Type::String(Default::default())),
1672            })),
1673        );
1674        obj.properties.insert(
1675            "age".to_string(),
1676            ReferenceOr::Item(Box::new(Schema {
1677                schema_data: SchemaData::default(),
1678                schema_kind: SchemaKind::Type(Type::Integer(Default::default())),
1679            })),
1680        );
1681        obj.required = vec!["name".into(), "age".into()];
1682        let schema = Schema {
1683            schema_data: SchemaData::default(),
1684            schema_kind: SchemaKind::Type(Type::Object(obj)),
1685        };
1686
1687        let mut o =
1688            op("POST", "/users", Some(r#"{"name":"Ada","age":30}"#), vec![], vec![], vec![]);
1689        o.request_body_schema = Some(schema);
1690        let report = run_self_test(&[o], &cfg).await.expect("client builds");
1691        // Bucket labels from the operation result.
1692        let labels: std::collections::BTreeSet<String> = report
1693            .operations
1694            .iter()
1695            .flat_map(|op| op.negatives.iter().map(|n| n.label.clone()))
1696            .collect();
1697        assert!(
1698            labels.iter().any(|l| l.starts_with("request-body:type-mismatch:")),
1699            "missing type-mismatch negative; got {labels:?}"
1700        );
1701        assert!(
1702            labels.iter().any(|l| l.starts_with("request-body:required-removed:")),
1703            "missing required-removed negative; got {labels:?}"
1704        );
1705        assert!(
1706            labels.iter().any(|l| l == "parameters:uri-too-long"),
1707            "missing URI-length negative; got {labels:?}"
1708        );
1709    }
1710
1711    /// Round 16 — operations with a body OR a path-param now produce
1712    /// negatives even without a sample body. Previously a POST whose
1713    /// body annotator failed produced *zero* negatives, so the self-test
1714    /// always reported "all passing" for that endpoint.
1715    #[tokio::test]
1716    async fn no_sample_body_still_produces_request_body_negatives() {
1717        let cfg = SelfTestConfig {
1718            target_url: "http://127.0.0.1:1".into(),
1719            timeout: Duration::from_millis(200),
1720            ..Default::default()
1721        };
1722        // POST with a body content type but no sample (annotator gap).
1723        let ops = vec![op("POST", "/x", None, vec![], vec![], vec![])];
1724        // No sample_body but request_body_content_type set:
1725        let mut ops_fixed = ops;
1726        ops_fixed[0].request_body_content_type = Some("application/json".into());
1727        let report = run_self_test(&ops_fixed, &cfg).await.expect("client builds");
1728        // Both request-body negatives (empty + wrong-type) should fire,
1729        // landing in `negative_missed` because the unreachable target
1730        // returns no 4xx. The point: count > 0.
1731        assert!(
1732            report.negative_missed.values().sum::<usize>() >= 2,
1733            "expected ≥2 request-body negatives, got {:?}",
1734            report.negative_missed
1735        );
1736    }
1737
1738    /// Round 16 — operations with a path-param now get a probe even
1739    /// when there's no body / required query / required header.
1740    /// Previously `/teams/{team-id}` with no other required fields
1741    /// produced zero negatives → always "all passing".
1742    #[tokio::test]
1743    async fn path_param_only_endpoint_produces_a_probe() {
1744        let cfg = SelfTestConfig {
1745            target_url: "http://127.0.0.1:1".into(),
1746            timeout: Duration::from_millis(200),
1747            ..Default::default()
1748        };
1749        let ops = vec![op(
1750            "GET",
1751            "/teams/{team-id}",
1752            None,
1753            vec![],
1754            vec![],
1755            vec![("team-id", "1")],
1756        )];
1757        let report = run_self_test(&ops, &cfg).await.expect("client builds");
1758        let total: usize = report.negative_caught.values().sum::<usize>()
1759            + report.negative_missed.values().sum::<usize>();
1760        assert!(total >= 1, "expected ≥1 path-param probe, got {:?}", report);
1761    }
1762
1763    /// Round 18.5 — when `geo_ip` is set, every default forwarded-
1764    /// IP header gets the IP appended (X-Forwarded-For,
1765    /// True-Client-IP, CF-Connecting-IP).
1766    #[test]
1767    fn effective_op_headers_appends_geo_ip_to_default_headers() {
1768        let ip: IpAddr = "203.0.113.42".parse().unwrap();
1769        let headers = effective_op_headers(
1770            &[("Accept".into(), "application/json".into())],
1771            Some(ip),
1772            &default_geo_source_headers(),
1773        );
1774        let names: Vec<&str> = headers.iter().map(|(k, _)| k.as_str()).collect();
1775        assert!(names.contains(&"Accept"));
1776        assert!(names.contains(&"X-Forwarded-For"));
1777        assert!(names.contains(&"True-Client-IP"));
1778        assert!(names.contains(&"CF-Connecting-IP"));
1779        // Every geo header carries the same IP value.
1780        let geo_values: Vec<&str> =
1781            headers.iter().filter(|(k, _)| k != "Accept").map(|(_, v)| v.as_str()).collect();
1782        for v in geo_values {
1783            assert_eq!(v, "203.0.113.42");
1784        }
1785    }
1786
1787    /// Round 18.5 — operations that already declare a forwarded-IP
1788    /// header (rare but legal — some specs hard-code one) keep their
1789    /// declared value; we don't clobber the spec.
1790    #[test]
1791    fn effective_op_headers_respects_spec_declared_header() {
1792        let ip: IpAddr = "203.0.113.99".parse().unwrap();
1793        let headers = effective_op_headers(
1794            &[("x-forwarded-for".into(), "10.0.0.1".into())],
1795            Some(ip),
1796            &["X-Forwarded-For".to_string()],
1797        );
1798        // The spec's lower-case value wins; we shouldn't add a
1799        // second X-Forwarded-For row that overrides it.
1800        let xff: Vec<&str> = headers
1801            .iter()
1802            .filter(|(k, _)| k.eq_ignore_ascii_case("x-forwarded-for"))
1803            .map(|(_, v)| v.as_str())
1804            .collect();
1805        assert_eq!(xff, vec!["10.0.0.1"]);
1806    }
1807
1808    /// Round 18.5 — None geo_ip and/or empty header list is a no-op.
1809    #[test]
1810    fn effective_op_headers_is_a_noop_without_geo_ip() {
1811        let base = vec![("Accept".into(), "json".into())];
1812        let h1 = effective_op_headers(&base, None, &default_geo_source_headers());
1813        assert_eq!(h1, base);
1814        let ip: IpAddr = "10.0.0.1".parse().unwrap();
1815        let h2 = effective_op_headers(&base, Some(ip), &[]);
1816        assert_eq!(h2, base);
1817    }
1818
1819    /// Round 18.5 — empty `source_ips` builds a single default
1820    /// client; a non-empty list builds N clients each attempting to
1821    /// bind. We can't reliably test the actual bind on CI (no
1822    /// loopback aliases), but a loopback IP is always bind-able.
1823    #[test]
1824    fn build_client_pool_one_per_source_ip() {
1825        let mut cfg = SelfTestConfig {
1826            target_url: "http://127.0.0.1:1".into(),
1827            timeout: Duration::from_millis(200),
1828            ..Default::default()
1829        };
1830        // Empty → one default client.
1831        assert_eq!(build_client_pool(&cfg).expect("default builds").len(), 1);
1832        // Non-empty → one per IP. Loopback bind is portable.
1833        cfg.source_ips = vec!["127.0.0.1".parse().unwrap()];
1834        assert_eq!(build_client_pool(&cfg).expect("bind loopback").len(), 1);
1835    }
1836
1837    /// Round 18.5 — geo IPs round-robin across operations. Hits an
1838    /// unreachable target so we can inspect the case outcomes; the
1839    /// point is to confirm `op_headers` carried the geo IP through
1840    /// (CaseOutcome doesn't surface headers directly, so we just
1841    /// verify the run completes without panicking and the result
1842    /// shape is correct when source_ips is non-empty too).
1843    #[tokio::test]
1844    async fn run_self_test_with_geo_source_completes() {
1845        let cfg = SelfTestConfig {
1846            target_url: "http://127.0.0.1:1".into(),
1847            timeout: Duration::from_millis(200),
1848            geo_source_ips: vec![
1849                "203.0.113.1".parse().unwrap(),
1850                "203.0.113.2".parse().unwrap(),
1851            ],
1852            ..Default::default()
1853        };
1854        let ops = vec![
1855            op("GET", "/a", None, vec![], vec![], vec![]),
1856            op("GET", "/b", None, vec![], vec![], vec![]),
1857            op("GET", "/c", None, vec![], vec![], vec![]),
1858        ];
1859        let report = run_self_test(&ops, &cfg).await.expect("client builds");
1860        assert_eq!(report.operations.len(), 3);
1861    }
1862
1863    /// Round 24 (f) — Srikanth saw the geo header on positive probes
1864    /// only; the four negative-probe call sites were passing
1865    /// `op.header_params` directly instead of `op_headers`, so the
1866    /// geo IP got dropped. This test runs a self-test that includes
1867    /// negative probes (uri-too-long, missing-query, etc.) under
1868    /// `--conformance-self-test-capture`, then asserts that EVERY
1869    /// captured probe (positive AND negative) carries one of the
1870    /// configured forwarded-IP headers.
1871    #[tokio::test]
1872    async fn geo_headers_present_on_every_probe_with_capture() {
1873        let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
1874        let cfg = SelfTestConfig {
1875            target_url: "http://127.0.0.1:1".into(),
1876            timeout: Duration::from_millis(50),
1877            geo_source_ips: vec!["203.0.113.5".parse().unwrap()],
1878            capture: Some(sink.clone()),
1879            ..Default::default()
1880        };
1881        // An operation rich enough to trip several negative-probe
1882        // branches: header param (→ missing-header), query param
1883        // (→ missing-query), and a sample body (→ schema mutations
1884        // wouldn't fire without a schema, but uri-too-long always
1885        // does).
1886        let ops = vec![op(
1887            "GET",
1888            "/items",
1889            Some("{}"),
1890            vec![("id", "1")],
1891            vec![("X-Trace", "x")],
1892            vec![],
1893        )];
1894        let _ = run_self_test(&ops, &cfg).await.expect("client builds");
1895        let captures = sink.lock().unwrap();
1896        assert!(!captures.is_empty(), "self-test should record probes");
1897        // For every captured probe, at least one of the default geo
1898        // headers must be present and equal to the configured IP.
1899        let geo_headers: std::collections::HashSet<&str> =
1900            ["X-Forwarded-For", "True-Client-IP", "CF-Connecting-IP"].into_iter().collect();
1901        for c in captures.iter() {
1902            let has_geo = c
1903                .request_headers
1904                .iter()
1905                .any(|(k, v)| geo_headers.contains(k.as_str()) && v == "203.0.113.5");
1906            assert!(
1907                has_geo,
1908                "probe `{}` is missing the geo IP header; got headers: {:?}",
1909                c.label, c.request_headers
1910            );
1911        }
1912    }
1913
1914    /// Round 25 (k) — operations with a JSON request body now get four
1915    /// content-type-swap probes (xml / yaml / multipart / urlencoded).
1916    /// Verify they:
1917    ///   1. fire only when the operation declares a JSON body
1918    ///   2. carry the wrong Content-Type the probe is testing for
1919    ///   3. don't fire on body-less operations
1920    #[tokio::test]
1921    async fn content_type_swap_probes_fire_for_json_bodies() {
1922        let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
1923        let cfg = SelfTestConfig {
1924            target_url: "http://127.0.0.1:1".into(),
1925            timeout: Duration::from_millis(50),
1926            capture: Some(sink.clone()),
1927            ..Default::default()
1928        };
1929        let ops = vec![
1930            op("POST", "/users", Some("{\"name\":\"a\"}"), vec![], vec![], vec![]),
1931            op("GET", "/ping", None, vec![], vec![], vec![]),
1932        ];
1933        let _ = run_self_test(&ops, &cfg).await.expect("client builds");
1934        let captures = sink.lock().unwrap();
1935
1936        let swap_labels: Vec<&str> = captures
1937            .iter()
1938            .filter(|c| c.label.starts_with("request-body:content-type-mismatch:"))
1939            .map(|c| c.label.as_str())
1940            .collect();
1941        assert_eq!(
1942            swap_labels.len(),
1943            4,
1944            "expected 4 content-type-swap probes (one per variant), got: {swap_labels:?}"
1945        );
1946        let expected_labels = [
1947            "request-body:content-type-mismatch:xml",
1948            "request-body:content-type-mismatch:yaml",
1949            "request-body:content-type-mismatch:multipart",
1950            "request-body:content-type-mismatch:urlencoded",
1951        ];
1952        for want in expected_labels {
1953            assert!(swap_labels.contains(&want), "missing swap probe `{want}`");
1954        }
1955
1956        // Each swap probe must carry the wrong Content-Type it's
1957        // testing for — that's the whole point.
1958        for c in captures.iter() {
1959            let Some(suffix) = c.label.strip_prefix("request-body:content-type-mismatch:") else {
1960                continue;
1961            };
1962            let want_ct = match suffix {
1963                "xml" => "application/xml",
1964                "yaml" => "application/yaml",
1965                "multipart" => "multipart/form-data",
1966                "urlencoded" => "application/x-www-form-urlencoded",
1967                _ => continue,
1968            };
1969            let got_ct = c
1970                .request_headers
1971                .iter()
1972                .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
1973                .map(|(_, v)| v.as_str())
1974                .unwrap_or("");
1975            assert_eq!(got_ct, want_ct, "swap probe `{}` sent wrong CT", c.label);
1976        }
1977
1978        // The body-less operation must NOT produce content-type-swap
1979        // probes (no body → no content type to lie about).
1980        let body_less_swaps = captures
1981            .iter()
1982            .filter(|c| {
1983                c.label.starts_with("request-body:content-type-mismatch:")
1984                    && c.url.ends_with("/ping")
1985            })
1986            .count();
1987        assert_eq!(
1988            body_less_swaps, 0,
1989            "GET /ping has no request body; should not produce content-type-swap probes"
1990        );
1991    }
1992
1993    /// Round 27 (k variant b) — Srikanth's round-23 follow-up on (k):
1994    /// JSON envelope with embedded non-JSON field values. For each
1995    /// JSON-body operation, four extra probes fire that send valid
1996    /// JSON with an XML/YAML/multipart/urlencoded snippet stuffed
1997    /// into a string field. Content-Type stays `application/json`;
1998    /// expected is 2xx-3xx (the body parses); a 5xx flags a server
1999    /// that crashed on the embedded content.
2000    #[tokio::test]
2001    async fn embedded_content_probes_fire_with_honest_content_type() {
2002        let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2003        let cfg = SelfTestConfig {
2004            target_url: "http://127.0.0.1:1".into(),
2005            timeout: Duration::from_millis(50),
2006            capture: Some(sink.clone()),
2007            ..Default::default()
2008        };
2009        let ops = vec![op(
2010            "POST",
2011            "/users",
2012            Some("{\"name\":\"alice\",\"age\":30}"),
2013            vec![],
2014            vec![],
2015            vec![],
2016        )];
2017        let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2018        let captures = sink.lock().unwrap();
2019        let embedded: Vec<&CaseCapture> = captures
2020            .iter()
2021            .filter(|c| c.label.starts_with("request-body:embedded-content:"))
2022            .collect();
2023        assert_eq!(
2024            embedded.len(),
2025            4,
2026            "expected 4 embedded-content probes, got: {:?}",
2027            embedded.iter().map(|c| &c.label).collect::<Vec<_>>()
2028        );
2029        // Every embedded probe must carry the honest application/json
2030        // Content-Type (NOT lie like the variant-a content-type-swap
2031        // probes do) and a request body that still parses as JSON.
2032        for c in &embedded {
2033            let ct = c
2034                .request_headers
2035                .iter()
2036                .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
2037                .map(|(_, v)| v.as_str())
2038                .unwrap_or("");
2039            assert!(
2040                ct.contains("application/json"),
2041                "embedded probe `{}` should keep Content-Type honest, got {ct}",
2042                c.label
2043            );
2044            let body = c.request_body.as_deref().unwrap_or("");
2045            assert!(
2046                serde_json::from_str::<serde_json::Value>(body).is_ok(),
2047                "embedded probe `{}` body should still be valid JSON, got: {body}",
2048                c.label
2049            );
2050        }
2051    }
2052
2053    /// `embed_payload_in_first_string_field` walks objects depth-first
2054    /// and replaces only the FIRST string-valued leaf, leaving the
2055    /// surrounding structure intact.
2056    #[test]
2057    fn embed_payload_replaces_first_string_only() {
2058        let sample = r#"{"name":"alice","age":30,"tags":["admin","user"]}"#;
2059        let mutated = embed_payload_in_first_string_field(sample, "<x/>");
2060        let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2061        assert_eq!(v["name"], serde_json::json!("<x/>"));
2062        // age stays an integer (not stringified by the mutation).
2063        assert_eq!(v["age"], serde_json::json!(30));
2064        // tags array's strings stay untouched (we only replace the
2065        // first encountered string leaf, depth-first).
2066        assert_eq!(v["tags"][0], serde_json::json!("admin"));
2067        assert_eq!(v["tags"][1], serde_json::json!("user"));
2068    }
2069
2070    /// When the sample has NO string field, the helper falls back to
2071    /// `{"data": "<snippet>"}` so the probe still has something to
2072    /// POST. The fallback must produce valid JSON regardless of what
2073    /// characters the snippet contains.
2074    #[test]
2075    fn embed_payload_falls_back_when_no_string_field() {
2076        let no_strings = r#"{"a":1,"b":[2,3]}"#;
2077        let mutated = embed_payload_in_first_string_field(no_strings, "<x><y></y></x>");
2078        let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2079        assert_eq!(v["data"], serde_json::json!("<x><y></y></x>"));
2080    }
2081
2082    #[test]
2083    fn embed_payload_handles_invalid_json_sample() {
2084        let not_json = "garbage";
2085        let mutated = embed_payload_in_first_string_field(not_json, "a=1&b=2");
2086        let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2087        assert_eq!(v["data"], serde_json::json!("a=1&b=2"));
2088    }
2089
2090    /// Round 26 — Srikanth saw `at /: Type { kind: Single` in his
2091    /// 0.3.169 capture for the vCenter `infraprofile/configs` 202
2092    /// response (spec promised `type: string`, server returned a
2093    /// JSON object). The output was a broken-syntax debug string.
2094    /// This test reproduces his exact spec+body and asserts the
2095    /// message is readable.
2096    #[test]
2097    fn response_schema_error_message_is_readable() {
2098        let schema = serde_json::json!({"type": "string"});
2099        let body = r#"{"data":{},"id":"generated_id","status":"created"}"#;
2100        let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2101        // The message must NOT contain Rust debug syntax leftovers
2102        // ("Type { kind:", trailing "{" or "(" tokens). It SHOULD say
2103        // what type was expected and at which location.
2104        assert!(!err.contains("Type { kind"), "stale debug output: {err}");
2105        assert!(!err.contains("{ kind:"), "stale debug output: {err}");
2106        assert!(err.contains("string"), "should name expected type: {err}");
2107        assert!(err.contains("at /"), "should include instance path: {err}");
2108    }
2109
2110    #[test]
2111    fn response_schema_error_required_field_is_readable() {
2112        let schema = serde_json::json!({
2113            "type": "object",
2114            "required": ["id"],
2115            "properties": {"id": {"type": "integer"}}
2116        });
2117        let body = r#"{"other": 1}"#;
2118        let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2119        assert!(err.contains("required field missing"), "{err}");
2120        assert!(err.contains("id"), "{err}");
2121    }
2122
2123    #[test]
2124    fn response_schema_error_none_on_match() {
2125        let schema = serde_json::json!({"type": "string"});
2126        assert_eq!(validate_body_against_schema("\"hello\"", &schema), None);
2127    }
2128
2129    #[test]
2130    fn json_serialises_report() {
2131        let r = SelfTestReport {
2132            positive_pass: 1,
2133            positive_fail: 0,
2134            negative_caught: BTreeMap::new(),
2135            negative_missed: BTreeMap::new(),
2136            operations: vec![OperationResult {
2137                method: "GET".into(),
2138                path: "/x".into(),
2139                positive: Some(CaseOutcome {
2140                    label: "positive".into(),
2141                    expected_4xx: false,
2142                    actual_status: 200,
2143                    passed: true,
2144                }),
2145                negatives: Vec::new(),
2146            }],
2147        };
2148        let json = serde_json::to_value(&r).expect("serialises");
2149        assert_eq!(json["positive_pass"], serde_json::json!(1));
2150        assert_eq!(json["operations"][0]["positive"]["actual_status"], serde_json::json!(200));
2151    }
2152}