Skip to main content

mockforge_bench/conformance/
self_test.rs

1//! Positive + per-category negative request driver against a live server.
2//!
3//! Issue #79 round 13 (4) — Srikanth's (e) ask: a way to test both
4//! positive and negative compliance scenarios separately, where the
5//! positive cases should pass and the negative cases should be
6//! rejected.
7//!
8//! This module sits *alongside* the existing conformance executor
9//! (which drives k6 / native checks on a single positive call per
10//! operation). The self-test driver synthesises per-category
11//! deliberately-bad requests and asserts that the server actually
12//! rejects them with a 4xx — useful when verifying that
13//! `validate_request_with_all` is wired correctly for the user's spec
14//! (the exact gap that round-13 (3) fixed).
15//!
16//! Scope of the initial MVP: covers the highest-signal negatives —
17//! empty body when one is required, missing required query/header
18//! params, and wrong-type path params. Doesn't try to mutate every
19//! field of a JSON-Schema-validated body; that's a follow-up.
20
21use super::spec_driven::{AnnotatedOperation, ApiKeyLocation, SecuritySchemeInfo};
22use reqwest::{Client, Method};
23use std::collections::BTreeMap;
24use std::net::IpAddr;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::sync::{Arc, Mutex};
27use std::time::Duration;
28
29/// Round 23 (c-iii) — per-direction body cap when capturing
30/// request/response payloads to `conformance-self-test-requests.jsonl`.
31/// 16 KiB keeps a 1000-case run under ~32 MB even if every payload
32/// fills the cap, while still preserving enough of a typical JSON body
33/// (or a stack-trace error response) to debug from.
34const CAPTURE_BODY_CAP_BYTES: usize = 16 * 1024;
35
36/// Round 17.2 — cap on schema-driven negatives per operation. A spec
37/// with 100 properties per body could produce hundreds of mutations
38/// for a single operation; combined with thousands of operations
39/// that's a runaway test matrix. 12 covers the highest-signal
40/// mutations (type mismatch + required-removed + a few constraint
41/// breaks) without exploding wall time on large specs.
42const SCHEMA_MUTATION_CAP: usize = 12;
43
44/// Round 25 (k) — content-type swap probes. For operations declaring a
45/// JSON request body, each entry below produces one probe that lies
46/// about Content-Type while keeping the JSON payload. A spec-compliant
47/// server should respond 415 (or 400). Order matches the order
48/// Srikanth listed in his round-23 reply: XML, YAML, multipart, and
49/// the URL-encoded variant he added in round 24.
50const CONTENT_TYPE_SWAP_VARIANTS: &[(&str, &str)] = &[
51    ("application/xml", "request-body:content-type-mismatch:xml"),
52    ("application/yaml", "request-body:content-type-mismatch:yaml"),
53    ("multipart/form-data", "request-body:content-type-mismatch:multipart"),
54    (
55        "application/x-www-form-urlencoded",
56        "request-body:content-type-mismatch:urlencoded",
57    ),
58];
59
60/// Round 27 (k variant b) — embedded content payloads. Content-Type
61/// stays `application/json` and the envelope IS valid JSON; we just
62/// stuff a non-JSON snippet into a string field's value. The test
63/// surfaces servers that try to parse string field contents (e.g.
64/// XML-EE expanders, YAML loaders, urlencoded parsers) and crash on
65/// the payload — a 5xx here is the finding. Label, payload pairs:
66const EMBEDDED_CONTENT_VARIANTS: &[(&str, &str)] = &[
67    ("request-body:embedded-content:xml", "<root><cmd>execute()</cmd></root>"),
68    ("request-body:embedded-content:yaml", "key: value\n- item1\n- item2"),
69    (
70        "request-body:embedded-content:multipart",
71        "--boundary\r\nContent-Disposition: form-data; name=\"x\"\r\n\r\nval\r\n--boundary--",
72    ),
73    ("request-body:embedded-content:urlencoded", "a=1&b=2&c=hello%20world"),
74];
75
76/// Configuration for a self-test run.
77#[derive(Debug, Clone)]
78pub struct SelfTestConfig {
79    pub target_url: String,
80    pub skip_tls_verify: bool,
81    pub timeout: Duration,
82    /// Optional extra headers to attach to every request (e.g. auth).
83    pub extra_headers: Vec<(String, String)>,
84    /// Delay between requests to avoid hammering the server.
85    pub delay_between_requests: Duration,
86    /// Round 18.1 — base path to prepend to every spec path. When the
87    /// spec declares `/users` and the deployed API is served under
88    /// `/api`, `--base-path /api` should make the self-test hit
89    /// `https://target/api/users` instead of `https://target/users`.
90    /// Pre-fix this was ignored entirely and every operation 404'd
91    /// (Srikanth's vCenter run on 0.3.152: 1275 positives, 1275 4xx).
92    pub base_path: Option<String>,
93    /// Round 18.5 — local source IPs to bind outgoing requests to.
94    /// Each IP must already be assigned to an interface on the host.
95    /// Operations round-robin through the resulting client pool.
96    pub source_ips: Vec<IpAddr>,
97    /// Round 18.5 — fake source IPs to advertise via forwarded-IP
98    /// headers (used to exercise GEODB lookup at the destination).
99    /// Rotated per operation.
100    pub geo_source_ips: Vec<IpAddr>,
101    /// Which forwarded-IP header(s) to populate when `geo_source_ips`
102    /// is non-empty. Empty → no-op; default below sets the standard
103    /// three-header set.
104    pub geo_source_headers: Vec<String>,
105    /// Round 23 (c-iii) — when `Some`, every probe captures method, URL,
106    /// request headers/body and response status/headers/body into this
107    /// sink. Caller drains it after `run_self_test` and writes
108    /// `conformance-self-test-requests.jsonl`. None → no capture (zero
109    /// extra allocations on the hot path).
110    pub capture: Option<Arc<Mutex<Vec<CaseCapture>>>>,
111    /// Round 25 — when true, validate every probe's response body
112    /// against the spec's response schema for the actual status
113    /// returned (closes round 21.3 / Srikanth's a2 / a3 ask). The
114    /// validation result lands in `CaseCapture::response_schema_error`
115    /// (None → matched, or no schema for that status). Default false:
116    /// JSON-Schema validation of large response bodies adds wall-clock
117    /// time and the user has to opt in.
118    pub validate_response_schemas: bool,
119}
120
121/// Round 23 (c-iii) — one captured request/response pair, one per
122/// probe (positive or negative). Serialised as a JSON line in
123/// `conformance-self-test-requests.jsonl`. Headers are kept as
124/// `BTreeMap` for stable ordering. Bodies are truncated to
125/// `CAPTURE_BODY_CAP_BYTES`; `*_truncated` flags whether more was
126/// dropped.
127#[derive(Debug, Clone, serde::Serialize)]
128pub struct CaseCapture {
129    pub label: String,
130    pub method: String,
131    pub url: String,
132    pub request_headers: BTreeMap<String, String>,
133    pub request_body: Option<String>,
134    pub request_body_truncated: bool,
135    pub response_status: u16,
136    pub response_headers: BTreeMap<String, String>,
137    pub response_body: Option<String>,
138    pub response_body_truncated: bool,
139    pub error: Option<String>,
140    /// Round 25 — when `validate_response_schemas` is on and the spec
141    /// declares a schema for `response_status`, this carries the
142    /// validation message (or None when the body matched, or no schema
143    /// was declared for that status). Serialised verbatim in the JSONL
144    /// and rendered in the HTML viewer.
145    #[serde(default, skip_serializing_if = "Option::is_none")]
146    pub response_schema_error: Option<String>,
147    /// Round 28 — Srikanth's "Is it possible to put expected response
148    /// code status in both jsonl and jsonl report" ask. Human-readable
149    /// expected status range: `"2xx-3xx"` for positive probes,
150    /// `"4xx"` for negatives. Lets users `jq` for misses
151    /// (`.response_status as $s | .expected_status_range == "4xx"
152    /// and ($s < 400 or $s >= 500)`) and powers the HTML viewer's
153    /// "show mismatches only" filter.
154    #[serde(default)]
155    pub expected_status_range: String,
156}
157
158impl Default for SelfTestConfig {
159    fn default() -> Self {
160        Self {
161            target_url: "http://localhost:3000".into(),
162            skip_tls_verify: false,
163            timeout: Duration::from_secs(15),
164            extra_headers: Vec::new(),
165            delay_between_requests: Duration::from_millis(0),
166            base_path: None,
167            source_ips: Vec::new(),
168            geo_source_ips: Vec::new(),
169            geo_source_headers: default_geo_source_headers(),
170            capture: None,
171            validate_response_schemas: false,
172        }
173    }
174}
175
176/// Truncate `body` to `CAPTURE_BODY_CAP_BYTES` on a UTF-8 boundary,
177/// returning the trimmed string and whether truncation occurred. Used
178/// for both request and response bodies in the capture sink.
179fn truncate_body_for_capture(body: &str) -> (String, bool) {
180    if body.len() <= CAPTURE_BODY_CAP_BYTES {
181        return (body.to_string(), false);
182    }
183    let mut end = CAPTURE_BODY_CAP_BYTES;
184    while end > 0 && !body.is_char_boundary(end) {
185        end -= 1;
186    }
187    (body[..end].to_string(), true)
188}
189
190/// Default forwarded-IP header set. Covers the three conventions a
191/// real GEODB front-end is likely to read in this order of
192/// preference: Cloudflare (`CF-Connecting-IP`), Akamai/CloudFront
193/// (`True-Client-IP`), then the de-facto standard
194/// `X-Forwarded-For`. Override via `--geo-source-header` to test a
195/// specific stack.
196pub fn default_geo_source_headers() -> Vec<String> {
197    vec![
198        "X-Forwarded-For".to_string(),
199        "True-Client-IP".to_string(),
200        "CF-Connecting-IP".to_string(),
201    ]
202}
203
204/// Outcome of a single test case (positive or negative).
205#[derive(Debug, Clone, serde::Serialize)]
206pub struct CaseOutcome {
207    pub label: String,
208    pub expected_4xx: bool,
209    pub actual_status: u16,
210    /// True when the response status matches expectation
211    /// (positive → 2xx-3xx, negative → 4xx).
212    pub passed: bool,
213}
214
215/// All cases run against one annotated operation.
216#[derive(Debug, Clone, serde::Serialize)]
217pub struct OperationResult {
218    pub method: String,
219    pub path: String,
220    pub positive: Option<CaseOutcome>,
221    pub negatives: Vec<CaseOutcome>,
222}
223
224/// Summary report rolled up across all operations.
225#[derive(Debug, Default, Clone, serde::Serialize)]
226pub struct SelfTestReport {
227    pub positive_pass: usize,
228    pub positive_fail: usize,
229    /// Per category: count of negative cases the server correctly
230    /// rejected with a 4xx (we caught the spec violation).
231    pub negative_caught: BTreeMap<String, usize>,
232    /// Per category: count of negative cases that should have been
233    /// rejected but came back with a non-4xx (validator gap).
234    pub negative_missed: BTreeMap<String, usize>,
235    pub operations: Vec<OperationResult>,
236}
237
238impl SelfTestReport {
239    /// All-pass means every positive case got 2xx-3xx and every
240    /// negative case got 4xx.
241    pub fn all_passed(&self) -> bool {
242        self.positive_fail == 0 && self.negative_missed.values().sum::<usize>() == 0
243    }
244
245    /// Round 18.1 — detect the "self-test target is misconfigured"
246    /// case where every positive failed with the *same* status code.
247    /// The classic example: `--base-path /api` was forgotten so every
248    /// request hits a path the server doesn't know and returns 404.
249    /// Pre-warning, the user saw all-green negative buckets (because
250    /// "missing route" 404s look like "validator rejected") and no
251    /// indication that the run was meaningless. Returns Some(status)
252    /// when ≥10 positives all failed with the same status, else None.
253    pub fn detect_target_misconfiguration(&self) -> Option<u16> {
254        if self.positive_pass > 0 || self.positive_fail < 10 {
255            return None;
256        }
257        let mut seen: Option<u16> = None;
258        for op in &self.operations {
259            let Some(p) = &op.positive else {
260                continue;
261            };
262            if p.passed {
263                return None;
264            }
265            match seen {
266                None => seen = Some(p.actual_status),
267                Some(s) if s != p.actual_status => return None,
268                _ => {}
269            }
270        }
271        seen
272    }
273
274    /// Human-readable summary string. One line for positives, one per
275    /// category for negatives. Designed to slot into existing
276    /// `TerminalReporter` output.
277    pub fn render_summary(&self) -> String {
278        let mut out = String::new();
279        out.push_str(&format!(
280            "Positives: {} pass / {} fail\n",
281            self.positive_pass, self.positive_fail
282        ));
283        let mut keys: Vec<&String> =
284            self.negative_caught.keys().chain(self.negative_missed.keys()).collect();
285        keys.sort();
286        keys.dedup();
287        for cat in keys {
288            let caught = self.negative_caught.get(cat).copied().unwrap_or(0);
289            let missed = self.negative_missed.get(cat).copied().unwrap_or(0);
290            let mark = if missed == 0 { "✓" } else { "⚠" };
291            out.push_str(&format!(
292                "Negatives [{}]: {} caught / {} missed  {}\n",
293                cat, caught, missed, mark
294            ));
295        }
296        out
297    }
298}
299
300/// Execute the self-test plan against `config.target_url` for every
301/// `AnnotatedOperation`. Returns the aggregated report; callers
302/// decide how to display it (e.g. via `render_summary` or by writing
303/// the JSON serialisation to disk).
304pub async fn run_self_test(
305    operations: &[AnnotatedOperation],
306    config: &SelfTestConfig,
307) -> Result<SelfTestReport, reqwest::Error> {
308    // Round 18.5 — build a client pool when `source_ips` is set,
309    // one reqwest::Client per IP, each bound to its local address.
310    // Operations round-robin through the pool. Empty pool → single
311    // default client (the pre-18.5 behaviour).
312    let clients = build_client_pool(config)?;
313    let client_cursor = AtomicUsize::new(0);
314    let geo_cursor = AtomicUsize::new(0);
315
316    let mut report = SelfTestReport::default();
317    for op in operations {
318        let client_idx = client_cursor.fetch_add(1, Ordering::Relaxed) % clients.len();
319        let client = &clients[client_idx];
320        let geo_ip = if config.geo_source_ips.is_empty() {
321            None
322        } else {
323            let idx = geo_cursor.fetch_add(1, Ordering::Relaxed) % config.geo_source_ips.len();
324            Some(config.geo_source_ips[idx])
325        };
326        let result = test_operation(client, config, op, geo_ip).await;
327        if let Some(p) = &result.positive {
328            if p.passed {
329                report.positive_pass += 1;
330            } else {
331                report.positive_fail += 1;
332            }
333        }
334        for neg in &result.negatives {
335            let cat = neg.label.split(':').next().unwrap_or("other").to_string();
336            if neg.passed {
337                *report.negative_caught.entry(cat).or_insert(0) += 1;
338            } else {
339                *report.negative_missed.entry(cat).or_insert(0) += 1;
340            }
341        }
342        report.operations.push(result);
343        if !config.delay_between_requests.is_zero() {
344            tokio::time::sleep(config.delay_between_requests).await;
345        }
346    }
347    Ok(report)
348}
349
350/// Round 18.5 — append GEODB forwarded-IP headers to the
351/// operation's declared headers. Returns the original vec untouched
352/// when `geo_ip` is None or `geo_headers` is empty.
353///
354/// If the operation already declares one of the geo headers (rare
355/// but legal), we keep the operation's value — the caller's spec
356/// wins.
357fn effective_op_headers(
358    base: &[(String, String)],
359    geo_ip: Option<IpAddr>,
360    geo_headers: &[String],
361) -> Vec<(String, String)> {
362    let mut out = base.to_vec();
363    let Some(ip) = geo_ip else {
364        return out;
365    };
366    let value = ip.to_string();
367    for h in geo_headers {
368        // Case-insensitive duplicate check: don't override the
369        // spec's own declared value for the header.
370        if out.iter().any(|(k, _)| k.eq_ignore_ascii_case(h)) {
371            continue;
372        }
373        out.push((h.clone(), value.clone()));
374    }
375    out
376}
377
378/// Round 18.5 — build a pool of reqwest clients, one per declared
379/// source IP. Empty `source_ips` → a single default client.
380///
381/// The OS must already have each `source_ip` assigned to an
382/// interface; reqwest's `.local_address()` issues a `bind()` syscall
383/// at connect time, so an IP the kernel doesn't recognise surfaces
384/// as `EADDRNOTAVAIL` at request time, not at builder time.
385fn build_client_pool(config: &SelfTestConfig) -> Result<Vec<Client>, reqwest::Error> {
386    let make = |bind: Option<IpAddr>| -> Result<Client, reqwest::Error> {
387        let mut builder = Client::builder().timeout(config.timeout);
388        if config.skip_tls_verify {
389            builder = builder.danger_accept_invalid_certs(true);
390        }
391        if let Some(addr) = bind {
392            builder = builder.local_address(addr);
393        }
394        builder.build()
395    };
396    if config.source_ips.is_empty() {
397        Ok(vec![make(None)?])
398    } else {
399        config.source_ips.iter().map(|ip| make(Some(*ip))).collect()
400    }
401}
402
403async fn test_operation(
404    client: &Client,
405    config: &SelfTestConfig,
406    op: &AnnotatedOperation,
407    geo_ip: Option<IpAddr>,
408) -> OperationResult {
409    // Round 25 — track the sink length BEFORE we run any probes for
410    // this operation, so that after the probes finish we can mutate
411    // exactly the entries that belong to this op (the capture sink is
412    // shared but `run_self_test` iterates operations sequentially).
413    // Used by the response-schema validation pass below.
414    let sink_start = config.capture.as_ref().and_then(|s| s.lock().ok().map(|g| g.len()));
415
416    let url = build_url_with_base(
417        &config.target_url,
418        config.base_path.as_deref(),
419        &op.path,
420        &op.path_params,
421    );
422    let method = Method::from_bytes(op.method.to_uppercase().as_bytes()).unwrap_or(Method::GET);
423
424    // Round 18.5 — pre-compute the operation's effective headers
425    // with the geo source IP baked in. Doing it once here keeps the
426    // per-case `send_case` calls below unchanged. When `geo_ip` is
427    // None the result equals `op.header_params`.
428    let op_headers = effective_op_headers(&op.header_params, geo_ip, &config.geo_source_headers);
429
430    // ── Positive case ────────────────────────────────────────────
431    let positive = send_case(
432        client,
433        config,
434        method.clone(),
435        &url,
436        "positive",
437        false,
438        op.sample_body.as_deref(),
439        op.query_params.clone(),
440        op_headers.clone(),
441    )
442    .await;
443
444    // ── Negative cases ───────────────────────────────────────────
445    let mut negatives = Vec::new();
446
447    // (a) empty body when one is required.
448    //
449    // Round 16 — drop the `sample_body.is_some()` precondition. Operations
450    // whose body annotator couldn't synthesize a sample previously got
451    // zero negatives (so the self-test reported "all passing" even on
452    // POST /resource with a required body). The spec saying the operation
453    // *has* a request body is enough — an empty object is a valid
454    // negative regardless of whether we have a positive sample.
455    if op.request_body_content_type.is_some() {
456        negatives.push(
457            send_case(
458                client,
459                config,
460                method.clone(),
461                &url,
462                "request-body:empty",
463                true,
464                Some("{}"),
465                op.query_params.clone(),
466                op_headers.clone(),
467            )
468            .await,
469        );
470
471        // (b) wrong-shaped body (array instead of object) — exercises
472        // top-level type validation independently of which fields are
473        // required.
474        negatives.push(
475            send_case(
476                client,
477                config,
478                method.clone(),
479                &url,
480                "request-body:wrong-type",
481                true,
482                Some("[]"),
483                op.query_params.clone(),
484                op_headers.clone(),
485            )
486            .await,
487        );
488
489        // Round 25 (k) — content-type swap probes.
490        //
491        // For operations declaring `application/json` request bodies, send
492        // the SAME json payload (or a synthesised one) under four other
493        // content types: `application/xml`, `application/yaml`,
494        // `multipart/form-data`, `application/x-www-form-urlencoded`.
495        // The spec says the endpoint accepts only JSON, so a strict server
496        // should respond 415 Unsupported Media Type (or 400 if it tries
497        // to parse and fails). A 2xx means the server is accepting
498        // payloads outside its declared content negotiation, which is the
499        // failure mode behind a lot of "we crashed on a malformed XML
500        // upload" incidents.
501        //
502        // Variant (a) of Srikanth's round-23 g ask: lie about the
503        // Content-Type header. The body shape is honest JSON; only the
504        // header is swapped. Variant (b) (JSON envelope with embedded
505        // non-JSON field values) is deferred to round 26 because it
506        // requires a schema-aware field walker.
507        if op
508            .request_body_content_type
509            .as_deref()
510            .map(|ct| ct.contains("json"))
511            .unwrap_or(false)
512        {
513            let payload = op.sample_body.as_deref().unwrap_or("{}");
514            for (ct, label) in CONTENT_TYPE_SWAP_VARIANTS {
515                negatives.push(
516                    send_case_with_extra(
517                        client,
518                        config,
519                        method.clone(),
520                        &url,
521                        label,
522                        true,
523                        Some(payload),
524                        op.query_params.clone(),
525                        // Strip any Content-Type already on the operation
526                        // headers (the spec's positive value) so the
527                        // probe's value is the only one the server sees.
528                        op_headers
529                            .iter()
530                            .filter(|(k, _)| !k.eq_ignore_ascii_case("content-type"))
531                            .cloned()
532                            .collect(),
533                        // The wrong Content-Type rides on `extra_headers`
534                        // so it lands AFTER `send_case_with_extra`'s
535                        // unconditional `application/json` insertion in
536                        // request-body mode. Actually `send_case_with_extra`
537                        // only sets Content-Type when a body is present
538                        // AND there's no manual override; passing the
539                        // override here wins because reqwest preserves
540                        // the last-set header value.
541                        vec![("Content-Type".to_string(), (*ct).to_string())],
542                    )
543                    .await,
544                );
545            }
546
547            // Round 27 (k variant b) — embedded non-JSON content
548            // inside a valid JSON envelope. Content-Type stays
549            // application/json (honest) and the body parses as JSON;
550            // only the string-valued payload changes. We expect 2xx-3xx
551            // because the envelope is spec-shape, so the probe surfaces
552            // servers that crash (5xx) trying to parse the embedded
553            // snippet as XML/YAML/etc. A 4xx is also a finding because
554            // it usually means the server's pattern/format validator
555            // tripped on the payload contents, but the user can decide
556            // from the JSONL whether that's a bug or correct narrow-
557            // string-field behaviour.
558            for (label, snippet) in EMBEDDED_CONTENT_VARIANTS {
559                let payload = op.sample_body.as_deref().unwrap_or("{}");
560                let body = embed_payload_in_first_string_field(payload, snippet);
561                negatives.push(
562                    send_case(
563                        client,
564                        config,
565                        method.clone(),
566                        &url,
567                        label,
568                        // expected_4xx=false: any non-2xx is a probe
569                        // failure. 5xx in particular is "server panicked
570                        // on the embedded content".
571                        false,
572                        Some(&body),
573                        op.query_params.clone(),
574                        op_headers.clone(),
575                    )
576                    .await,
577                );
578            }
579        }
580
581        // Round 17.2 — schema-aware negatives.
582        //
583        // When both a positive sample AND the resolved body schema are
584        // available, mutate the sample per-field (type mismatch,
585        // min/max bounds, pattern, enum out-of-range, required-field
586        // removal) and assert each is rejected with 4xx. Capped at
587        // SCHEMA_MUTATION_CAP per operation so a 100-property body
588        // doesn't explode the test matrix.
589        if let (Some(sample_str), Some(schema)) =
590            (op.sample_body.as_deref(), op.request_body_schema.as_ref())
591        {
592            if let Ok(sample) = serde_json::from_str::<serde_json::Value>(sample_str) {
593                let mutations = super::schema_mutator::mutate_body(&sample, schema);
594                for m in mutations.into_iter().take(SCHEMA_MUTATION_CAP) {
595                    let body_str = serde_json::to_string(&m.body).unwrap_or_default();
596                    negatives.push(
597                        send_case(
598                            client,
599                            config,
600                            method.clone(),
601                            &url,
602                            &m.label,
603                            true,
604                            Some(&body_str),
605                            op.query_params.clone(),
606                            // Round 24 (f) — was `op.header_params`, which
607                            // skipped the geo-IP header. Use `op_headers`
608                            // so the geo IP rides with the negative probe
609                            // too (positive vs negative coverage must be
610                            // symmetric, otherwise a GEODB front-end sees
611                            // the rotating IP only on positives).
612                            op_headers.clone(),
613                        )
614                        .await,
615                    );
616                }
617            }
618        }
619    }
620
621    // Round 17.2 — URI-length probe. Spec-agnostic but schema-aware in
622    // spirit: most servers cap URIs at 8 KB or so. Append a 9 KB query
623    // string to the URL and expect 414 URI Too Long (or 400). Skipped
624    // for operations that already have a heavy positive query.
625    {
626        let pad = "p=".to_string() + &"x".repeat(9_000);
627        let bad_url = if url.contains('?') {
628            format!("{url}&{pad}")
629        } else {
630            format!("{url}?{pad}")
631        };
632        negatives.push(
633            send_case(
634                client,
635                config,
636                method.clone(),
637                &bad_url,
638                "parameters:uri-too-long",
639                true,
640                op.sample_body.as_deref(),
641                op.query_params.clone(),
642                // Round 24 (f) — see schema-mutation note above. Use
643                // `op_headers` (carries geo IP) instead of bare
644                // `op.header_params`.
645                op_headers.clone(),
646            )
647            .await,
648        );
649    }
650
651    // (e) Round 16 — path-param type probe. Send the first path
652    // parameter as a literal `"self-test-invalid-id"`: a string that
653    // contains hyphens, won't parse as an integer, won't parse as a
654    // UUID, and won't match any typical regex pattern. Operations
655    // whose spec types the param as `integer` or `string` with a
656    // `format`/`pattern` will catch this (caught: server returned
657    // 4xx); operations whose spec lets path params be free-form
658    // strings will let it through (missed: server returned 2xx).
659    // Either outcome is informative: a category that's all "missed"
660    // tells the user their spec is loose on path-param types, which
661    // is itself worth knowing. Addresses Srikanth's "always all
662    // passing" report — operations with a path param now produce at
663    // least one probe instead of zero.
664    if !op.path_params.is_empty() {
665        let mut url_with_placeholder = op.path.clone();
666        if let Some((first_name, _)) = op.path_params.first() {
667            // Substitute every other path-param with its sample so the
668            // route shape stays intact and only the first param is bad.
669            for (name, value) in op.path_params.iter().skip(1) {
670                if !value.is_empty() {
671                    url_with_placeholder =
672                        url_with_placeholder.replace(&format!("{{{name}}}"), value);
673                }
674            }
675            // Substitute the first param with a guaranteed-invalid
676            // sentinel that's unlikely to match any reasonable schema:
677            // contains characters disallowed in numeric IDs *and* UUIDs.
678            url_with_placeholder =
679                url_with_placeholder.replace(&format!("{{{first_name}}}"), "self-test-invalid-id");
680            // Round 18.1 — honour `base_path` here too, otherwise the
681            // probe URL differs from the positive case and the
682            // resulting 404 is misattributed to "bad path param".
683            let bad_url = build_url_with_base(
684                &config.target_url,
685                config.base_path.as_deref(),
686                &url_with_placeholder,
687                &[],
688            );
689            negatives.push(
690                send_case(
691                    client,
692                    config,
693                    method.clone(),
694                    &bad_url,
695                    "parameters:bad-path-param",
696                    true,
697                    op.sample_body.as_deref(),
698                    op.query_params.clone(),
699                    op_headers.clone(),
700                )
701                .await,
702            );
703        }
704    }
705
706    // (c) drop the first required query param
707    if !op.query_params.is_empty() {
708        let mut q = op.query_params.clone();
709        q.remove(0);
710        negatives.push(
711            send_case(
712                client,
713                config,
714                method.clone(),
715                &url,
716                "parameters:missing-query",
717                true,
718                op.sample_body.as_deref(),
719                q,
720                op_headers.clone(),
721            )
722            .await,
723        );
724    }
725
726    // (s) Round 17.3 — security probes.
727    //
728    // Operations whose spec declares a security requirement get a
729    // dedicated set of negatives. The point isn't to test whether the
730    // server's *real* auth works (the positive case already does that
731    // via `extra_headers`) — it's to check whether deliberately-bad
732    // credentials are still rejected, which is exactly the failure
733    // mode that lets an attacker through a half-wired validator.
734    //
735    // Each probe replaces or omits the relevant auth credential and
736    // expects 401 / 403. A 2xx here is a hard finding: "spec says
737    // this endpoint is protected, server let unauthenticated /
738    // wrong-credential traffic through".
739    //
740    // Bounded: at most one probe per declared scheme kind, so an
741    // operation with 3 security requirements doesn't 4× the request
742    // volume. Skips entirely when `op.security_schemes` is empty.
743    for probe in build_security_probes(&op.security_schemes) {
744        // Strip any pre-existing Authorization or known API-key
745        // header from extra_headers + header_params so the probe
746        // value is the *only* credential the server sees.
747        let stripped_extra = strip_auth(&config.extra_headers, &op.security_schemes);
748        let stripped_headers = strip_auth(&op.header_params, &op.security_schemes);
749        let stripped_query = strip_auth_query(&op.query_params, &op.security_schemes);
750        let mut req_headers = stripped_headers;
751        for (k, v) in &probe.headers {
752            req_headers.push((k.clone(), v.clone()));
753        }
754        // Round 24 (f) — security probes build req_headers from
755        // `op.header_params` directly (we need the stripped-auth
756        // variant), so the geo-IP header doesn't ride along
757        // automatically. Append it here so a GEODB / WAF in front
758        // of the auth layer still sees the rotating source IP.
759        if let Some(ip) = geo_ip {
760            let ip_str = ip.to_string();
761            for h in &config.geo_source_headers {
762                let already = req_headers.iter().any(|(k, _)| k.eq_ignore_ascii_case(h));
763                if !already {
764                    req_headers.push((h.clone(), ip_str.clone()));
765                }
766            }
767        }
768        let mut req_query = stripped_query;
769        for (k, v) in &probe.query {
770            req_query.push((k.clone(), v.clone()));
771        }
772        negatives.push(
773            send_case_with_extra(
774                client,
775                config,
776                method.clone(),
777                &url,
778                &probe.label,
779                true,
780                op.sample_body.as_deref(),
781                req_query,
782                req_headers,
783                stripped_extra,
784            )
785            .await,
786        );
787    }
788
789    // (d) drop the first required header
790    if !op.header_params.is_empty() {
791        // Round 24 (f) — start from `op_headers` (so the geo IP rides
792        // along) and only strip the first OPERATION-declared header.
793        // Slicing past `op.header_params.len()` would otherwise risk
794        // dropping the geo header itself; `op_headers` is built as
795        // `op.header_params ++ geo` so index 0 is always operational.
796        let mut h = op_headers.clone();
797        if !h.is_empty() {
798            h.remove(0);
799        }
800        negatives.push(
801            send_case(
802                client,
803                config,
804                method.clone(),
805                &url,
806                "parameters:missing-header",
807                true,
808                op.sample_body.as_deref(),
809                op.query_params.clone(),
810                h,
811            )
812            .await,
813        );
814    }
815
816    // (w) Round 17.5 — OWASP/WAF unification.
817    //
818    // Pull one canonical payload per OWASP category from the existing
819    // `SecurityPayloads` library and emit an injection probe per
820    // category. Targets in priority order: (1) substitute the first
821    // query param's value, (2) substitute the first string field of
822    // the positive JSON body, (3) skip if neither is available.
823    //
824    // Label format `owasp:<category>`, so the existing
825    // `negative_caught` / `negative_missed` rollup groups all OWASP
826    // findings under one `owasp` bucket. Expected 4xx (server should
827    // reject malicious input). A 5xx is a hard finding (server
828    // crashed on the payload); a 2xx is a soft finding (input passed
829    // through unfiltered — may or may not be a real vuln).
830    //
831    // Bounded: at most one probe per category (7 categories total).
832    // Skips the operation entirely if no injection target is
833    // available — open GET endpoints with no params get zero OWASP
834    // probes, no false signal.
835    for probe in build_owasp_probes(op) {
836        negatives.push(
837            send_case(
838                client,
839                config,
840                method.clone(),
841                &url,
842                &probe.label,
843                true,
844                probe.body.as_deref(),
845                probe.query,
846                // Round 24 (f) — OWASP injection probes must also
847                // carry the geo IP, otherwise a WAF / GEODB rule
848                // tuned to a specific source IP would silently let
849                // them through.
850                op_headers.clone(),
851            )
852            .await,
853        );
854    }
855
856    // Round 25 — response-body shape validation pass. For each capture
857    // this op pushed onto the sink, look up the spec's schema for the
858    // actual response status and validate. Result lands in
859    // `response_schema_error` (Some(message) on failure, None on
860    // pass or no-schema-for-this-status). Runs only when the user
861    // opted in AND capture is on (we need the body).
862    if config.validate_response_schemas {
863        if let (Some(sink), Some(start)) = (config.capture.as_ref(), sink_start) {
864            if !op.response_schemas.is_empty() {
865                if let Ok(mut guard) = sink.lock() {
866                    let end = guard.len();
867                    for i in start..end {
868                        let Some(entry) = guard.get_mut(i) else {
869                            continue;
870                        };
871                        let Some(body) = entry.response_body.as_deref() else {
872                            continue;
873                        };
874                        let Some(schema) = op.response_schemas.get(&entry.response_status) else {
875                            continue;
876                        };
877                        entry.response_schema_error = validate_body_against_schema(body, schema);
878                    }
879                }
880            }
881        }
882    }
883
884    OperationResult {
885        method: op.method.clone(),
886        path: op.path.clone(),
887        positive: Some(positive),
888        negatives,
889    }
890}
891
892/// Round 25 — validate a JSON body string against an OpenAPI response
893/// schema (already converted to a `serde_json::Value`). Returns
894/// `Some(message)` describing the first violation, or `None` on a
895/// clean pass / non-JSON body / schema-build failure (in which case
896/// the absence of an error means "we didn't have anything to compare
897/// against", not "passed"; the caller-side semantics treat absence as
898/// success because that's what the user sees as silence).
899/// Round 27 (k variant b) — return a JSON body string identical to
900/// `sample` except that the first string-valued leaf has been
901/// replaced with `snippet`. Walks objects depth-first and stops at
902/// the first string. If `sample` is not parseable JSON, or has no
903/// string fields, falls back to wrapping the snippet under a `data`
904/// key so the probe still has a body to send: `{"data": <snippet>}`.
905/// The result is always valid JSON ready for `application/json`.
906fn embed_payload_in_first_string_field(sample: &str, snippet: &str) -> String {
907    let mut parsed: serde_json::Value = match serde_json::from_str(sample) {
908        Ok(v) => v,
909        Err(_) => return format!(r#"{{"data":{}}}"#, json_quote(snippet)),
910    };
911    if !replace_first_string(&mut parsed, snippet) {
912        return format!(r#"{{"data":{}}}"#, json_quote(snippet));
913    }
914    serde_json::to_string(&parsed)
915        .unwrap_or_else(|_| format!(r#"{{"data":{}}}"#, json_quote(snippet)))
916}
917
918/// Helper for `embed_payload_in_first_string_field`: recursively
919/// walk the value and replace the FIRST string leaf encountered.
920/// Returns true when a replacement happened. Honors document order
921/// for objects (BTreeMap-backed `serde_json::Map` iterates in
922/// insertion order) so the choice of which field to mutate is
923/// stable across runs.
924fn replace_first_string(v: &mut serde_json::Value, snippet: &str) -> bool {
925    match v {
926        serde_json::Value::String(s) => {
927            *s = snippet.to_string();
928            true
929        }
930        serde_json::Value::Object(map) => {
931            for (_k, child) in map.iter_mut() {
932                if replace_first_string(child, snippet) {
933                    return true;
934                }
935            }
936            false
937        }
938        serde_json::Value::Array(arr) => {
939            for child in arr.iter_mut() {
940                if replace_first_string(child, snippet) {
941                    return true;
942                }
943            }
944            false
945        }
946        _ => false,
947    }
948}
949
950/// Helper for `embed_payload_in_first_string_field`'s fallback: take
951/// an arbitrary string and quote it for embedding inside a JSON
952/// literal. `serde_json::to_string(&value)` handles escaping
953/// correctly for unicode + control chars + quotes.
954fn json_quote(s: &str) -> String {
955    serde_json::to_string(s).unwrap_or_else(|_| "\"\"".to_string())
956}
957
958fn validate_body_against_schema(body: &str, schema: &serde_json::Value) -> Option<String> {
959    let parsed: serde_json::Value = serde_json::from_str(body).ok()?;
960    let validator = jsonschema::validator_for(schema).ok()?;
961    let mut errors = validator.iter_errors(&parsed);
962    let first = errors.next()?;
963    // Round 28 — Srikanth on 0.3.170 wanted the message to show the
964    // actual expected schema alongside the kind label so it reads as
965    // "expected schema {...} but got <kind>". We emit a compact JSON
966    // serialisation of the schema as a suffix; the kind label still
967    // names what went wrong in plain English for quick scanning.
968    // Round 26 — Srikanth on 0.3.169: the prior `format!("{:?}", first.kind)
969    // .split('(').next()` produced "Type { kind: Single" (broken Rust
970    // syntax, mismatched braces). Switch to the human-readable mapping
971    // already used in executor.rs: handle the common kinds (Type,
972    // Required, AdditionalProperties, Enum, MinLength, MaxLength,
973    // Minimum, Maximum, Pattern) explicitly; fall back to the
974    // jsonschema crate's Display impl on the error (which produces
975    // something like "{...} is not of type \"string\"") for the long
976    // tail. Combined with `at <instance-path>` for the field location.
977    let path = first.instance_path.to_string();
978    let path = if path.is_empty() { "/" } else { path.as_str() };
979    // Round 31 — Srikanth on 0.3.174 hit the vCenter case where the
980    // error is "required field missing: comment" but the printed
981    // schema was the WHOLE parent object schema (with descriptions of
982    // every property), not just the missing field's sub-schema. The
983    // jsonschema crate emits `Required` errors with
984    // `instance_path == /` (the parent), so the round-30 sub-schema
985    // walker had no extra info to focus the suffix. Carry the missing
986    // property name out of the kind match so we can descend one more
987    // step into `properties[property]` for the printed schema.
988    let mut required_property: Option<String> = None;
989    let kind_msg: String = match &first.kind {
990        jsonschema::error::ValidationErrorKind::Type { kind } => {
991            // `kind` is `TypeKind::Single(JsonType)` or
992            // `TypeKind::Multiple(JsonTypeSet)`. `JsonType` has its
993            // own `Display` impl ("string", "object", etc.).
994            match kind {
995                jsonschema::error::TypeKind::Single(t) => format!("expected type {t}"),
996                jsonschema::error::TypeKind::Multiple(_) => "expected one of multiple types".into(),
997            }
998        }
999        jsonschema::error::ValidationErrorKind::Required { property } => {
1000            // `property.to_string()` returns the Display of the JSON
1001            // value, which for a string is `"name"` (with quotes).
1002            // Strip them for the lookup; keep them in the human message.
1003            let raw = property.to_string();
1004            let unquoted = raw
1005                .strip_prefix('"')
1006                .and_then(|s| s.strip_suffix('"'))
1007                .unwrap_or(&raw)
1008                .to_string();
1009            required_property = Some(unquoted);
1010            format!("required field missing: {property}")
1011        }
1012        jsonschema::error::ValidationErrorKind::AdditionalProperties { unexpected } => {
1013            format!("unexpected additional properties: {unexpected:?}")
1014        }
1015        jsonschema::error::ValidationErrorKind::Enum { options } => {
1016            format!("value not in allowed enum: {options}")
1017        }
1018        jsonschema::error::ValidationErrorKind::MinLength { limit } => {
1019            format!("string shorter than min length ({limit})")
1020        }
1021        jsonschema::error::ValidationErrorKind::MaxLength { limit } => {
1022            format!("string longer than max length ({limit})")
1023        }
1024        jsonschema::error::ValidationErrorKind::Minimum { limit } => {
1025            format!("value below minimum ({limit})")
1026        }
1027        jsonschema::error::ValidationErrorKind::Maximum { limit } => {
1028            format!("value above maximum ({limit})")
1029        }
1030        jsonschema::error::ValidationErrorKind::Pattern { pattern } => {
1031            format!("value did not match pattern {pattern}")
1032        }
1033        // Long tail: lean on jsonschema's Display impl, which is the
1034        // built-in human-readable error message ("X is not of type Y").
1035        // Strip trailing newlines so the JSONL line stays one line.
1036        _ => first.to_string().trim().to_string(),
1037    };
1038    // Round 30 — Srikanth on 0.3.173 asked how a deeper nested mismatch
1039    // reads. The prior output printed the WHOLE top-level schema even for
1040    // a single-field mismatch, which buried the actual constraint that
1041    // failed. Walk the instance pointer through the schema's properties
1042    // chain and print the most specific sub-schema we can find. Falls
1043    // back to the full schema for paths the walker can't resolve
1044    // (additionalProperties, oneOf, allOf, $ref un-resolved, etc.).
1045    let mut focused_schema = sub_schema_at_pointer(schema, path).unwrap_or_else(|| schema.clone());
1046    // Round 31 — for Required errors, descend one more step into
1047    // `properties[<missing>]` so the printed schema is the missing
1048    // field's own constraint, not the whole parent.
1049    if let Some(prop_name) = required_property.as_ref() {
1050        if let Some(prop_schema) =
1051            focused_schema.get("properties").and_then(|p| p.get(prop_name.as_str()))
1052        {
1053            focused_schema = prop_schema.clone();
1054        }
1055    }
1056    let schema_str = serde_json::to_string(&focused_schema).unwrap_or_else(|_| "<schema>".into());
1057    let schema_str = if schema_str.len() > 300 {
1058        format!("{}...", &schema_str[..300])
1059    } else {
1060        schema_str
1061    };
1062    // Round 29 — Srikanth on 0.3.172 was confused by `at /:` thinking
1063    // it referenced the URL path; it's actually a JSON pointer into
1064    // the RESPONSE BODY. Reword so that's unambiguous: explicit
1065    // "response body" prefix and a human label for the root case.
1066    let location = if path == "/" {
1067        "response body root".to_string()
1068    } else {
1069        format!("response body at {path}")
1070    };
1071    Some(format!("{location}: {kind_msg}; expected schema {schema_str}"))
1072}
1073
1074/// Round 30 — walk a JSON-Pointer-style instance path through a JSON
1075/// Schema and return the sub-schema describing the value at that
1076/// position. For path `/name/age` on
1077/// `{"properties":{"name":{"properties":{"age":{"type":"integer"}}}}}`
1078/// returns `{"type":"integer"}`. Returns `None` for paths the walker
1079/// can't follow (array indices into `items` with no per-index schema,
1080/// `additionalProperties`, `oneOf`/`allOf`, unresolved `$ref`); callers
1081/// should fall back to the full schema in that case.
1082fn sub_schema_at_pointer(schema: &serde_json::Value, pointer: &str) -> Option<serde_json::Value> {
1083    if pointer.is_empty() || pointer == "/" {
1084        return Some(schema.clone());
1085    }
1086    let mut current = schema;
1087    for seg in pointer.trim_start_matches('/').split('/') {
1088        let unescaped = seg.replace("~1", "/").replace("~0", "~");
1089        if let Some(props) = current.get("properties") {
1090            if let Some(sub) = props.get(&unescaped) {
1091                current = sub;
1092                continue;
1093            }
1094        }
1095        if let Some(items) = current.get("items") {
1096            if items.is_object() {
1097                current = items;
1098                continue;
1099            }
1100        }
1101        return None;
1102    }
1103    Some(current.clone())
1104}
1105
1106/// Round 17.5 — one OWASP injection probe to send.
1107#[derive(Debug, Clone)]
1108struct OwaspProbe {
1109    label: String,
1110    body: Option<String>,
1111    query: Vec<(String, String)>,
1112}
1113
1114/// Build one OWASP probe per `SecurityCategory` for `op`. Targets the
1115/// first query param if any, else the first string field of the
1116/// positive JSON body. Returns empty if neither target is available.
1117fn build_owasp_probes(op: &AnnotatedOperation) -> Vec<OwaspProbe> {
1118    use crate::security_payloads::{SecurityCategory, SecurityPayloads};
1119
1120    let categories = [
1121        SecurityCategory::SqlInjection,
1122        SecurityCategory::Xss,
1123        SecurityCategory::CommandInjection,
1124        SecurityCategory::PathTraversal,
1125        SecurityCategory::Ssti,
1126        SecurityCategory::LdapInjection,
1127        SecurityCategory::Xxe,
1128    ];
1129
1130    // Pick an injection target ONCE per operation; reuse it across
1131    // categories. (A single op gets up to 7 probes — one per category
1132    // — all attacking the same field.)
1133    let injection_target = pick_injection_target(op);
1134    let Some(target) = injection_target else {
1135        return Vec::new();
1136    };
1137
1138    let mut probes = Vec::new();
1139    for cat in categories {
1140        // Take the *first* payload from each category. The
1141        // collection's first entry is the canonical low-risk
1142        // representative; later entries include time-based / blind
1143        // probes that aren't useful as a one-shot rejection test.
1144        let Some(payload) = SecurityPayloads::get_by_category(cat).into_iter().next() else {
1145            continue;
1146        };
1147        let mut query = op.query_params.clone();
1148        let mut body = op.sample_body.clone();
1149        match &target {
1150            InjectionTarget::Query(idx) => {
1151                if let Some(slot) = query.get_mut(*idx) {
1152                    slot.1 = payload.payload.clone();
1153                }
1154            }
1155            InjectionTarget::BodyStringField(field) => {
1156                body = inject_into_body_field(body.as_deref(), field, &payload.payload);
1157            }
1158        }
1159        probes.push(OwaspProbe {
1160            label: format!("owasp:{}", cat),
1161            body,
1162            query,
1163        });
1164    }
1165    probes
1166}
1167
1168#[derive(Debug, Clone)]
1169enum InjectionTarget {
1170    Query(usize),
1171    BodyStringField(String),
1172}
1173
1174fn pick_injection_target(op: &AnnotatedOperation) -> Option<InjectionTarget> {
1175    if !op.query_params.is_empty() {
1176        return Some(InjectionTarget::Query(0));
1177    }
1178    let sample = op.sample_body.as_deref()?;
1179    let parsed: serde_json::Value = serde_json::from_str(sample).ok()?;
1180    let obj = parsed.as_object()?;
1181    for (k, v) in obj {
1182        if v.is_string() {
1183            return Some(InjectionTarget::BodyStringField(k.clone()));
1184        }
1185    }
1186    None
1187}
1188
1189/// Replace the value of `field` in a JSON-object body with `payload`.
1190/// Returns the mutated body as a JSON string. Returns `None` if the
1191/// body doesn't parse as a JSON object.
1192fn inject_into_body_field(body: Option<&str>, field: &str, payload: &str) -> Option<String> {
1193    let raw = body?;
1194    let mut parsed: serde_json::Value = serde_json::from_str(raw).ok()?;
1195    let obj = parsed.as_object_mut()?;
1196    obj.insert(field.to_string(), serde_json::json!(payload));
1197    serde_json::to_string(&parsed).ok()
1198}
1199
1200#[allow(clippy::too_many_arguments)]
1201/// Round 17.3 — one synthesised bad credential to send.
1202#[derive(Debug, Clone)]
1203struct SecurityProbe {
1204    /// Self-test label, e.g. `security:bad-bearer`.
1205    label: String,
1206    /// Headers to attach to the probe request.
1207    headers: Vec<(String, String)>,
1208    /// Query parameters to attach (API key in query case).
1209    query: Vec<(String, String)>,
1210}
1211
1212/// For each declared security scheme, produce one bad-credential
1213/// probe plus a single "no auth at all" probe that exercises the
1214/// missing-credential code path. Deduplicates by scheme kind so an
1215/// operation declaring `[bearer, bearer]` only yields one Bearer
1216/// probe.
1217fn build_security_probes(schemes: &[SecuritySchemeInfo]) -> Vec<SecurityProbe> {
1218    if schemes.is_empty() {
1219        return Vec::new();
1220    }
1221    let mut probes: Vec<SecurityProbe> = Vec::new();
1222    let mut seen_bearer = false;
1223    let mut seen_basic = false;
1224    // `(loc_tag, name)` — ApiKeyLocation doesn't implement Ord, so
1225    // we tag it with a short discriminant string for dedup.
1226    let mut seen_apikey: std::collections::BTreeSet<(&'static str, String)> = Default::default();
1227    for s in schemes {
1228        match s {
1229            SecuritySchemeInfo::Bearer if !seen_bearer => {
1230                seen_bearer = true;
1231                probes.push(SecurityProbe {
1232                    label: "security:bad-bearer".into(),
1233                    headers: vec![(
1234                        "Authorization".into(),
1235                        "Bearer self-test-invalid-token".into(),
1236                    )],
1237                    query: Vec::new(),
1238                });
1239            }
1240            SecuritySchemeInfo::Basic if !seen_basic => {
1241                seen_basic = true;
1242                // base64("self-test:invalid") — valid base64, wrong creds.
1243                probes.push(SecurityProbe {
1244                    label: "security:bad-basic".into(),
1245                    headers: vec![(
1246                        "Authorization".into(),
1247                        "Basic c2VsZi10ZXN0OmludmFsaWQ=".into(),
1248                    )],
1249                    query: Vec::new(),
1250                });
1251            }
1252            SecuritySchemeInfo::ApiKey { location, name } => {
1253                let loc_tag = match location {
1254                    ApiKeyLocation::Header => "header",
1255                    ApiKeyLocation::Query => "query",
1256                    ApiKeyLocation::Cookie => "cookie",
1257                };
1258                if seen_apikey.contains(&(loc_tag, name.clone())) {
1259                    continue;
1260                }
1261                seen_apikey.insert((loc_tag, name.clone()));
1262                let label = format!("security:bad-apikey:{}", name);
1263                let bad = "self-test-invalid-key".to_string();
1264                match location {
1265                    ApiKeyLocation::Header => probes.push(SecurityProbe {
1266                        label,
1267                        headers: vec![(name.clone(), bad)],
1268                        query: Vec::new(),
1269                    }),
1270                    ApiKeyLocation::Query => probes.push(SecurityProbe {
1271                        label,
1272                        headers: Vec::new(),
1273                        query: vec![(name.clone(), bad)],
1274                    }),
1275                    ApiKeyLocation::Cookie => probes.push(SecurityProbe {
1276                        label,
1277                        headers: vec![("Cookie".into(), format!("{}={}", name, bad))],
1278                        query: Vec::new(),
1279                    }),
1280                }
1281            }
1282            _ => {}
1283        }
1284    }
1285    // Always add a "no auth at all" probe when *any* security scheme
1286    // is declared — useful even if all schemes failed to resolve to a
1287    // testable kind, because it surfaces validators that aren't
1288    // checking auth presence at all.
1289    probes.push(SecurityProbe {
1290        label: "security:no-auth".into(),
1291        headers: Vec::new(),
1292        query: Vec::new(),
1293    });
1294    probes
1295}
1296
1297/// Remove Authorization and any API-key headers declared by the
1298/// operation's security schemes from `headers`, so a security probe
1299/// can supply its own credential (or none) cleanly.
1300fn strip_auth(
1301    headers: &[(String, String)],
1302    schemes: &[SecuritySchemeInfo],
1303) -> Vec<(String, String)> {
1304    let mut apikey_headers: std::collections::BTreeSet<String> = Default::default();
1305    for s in schemes {
1306        if let SecuritySchemeInfo::ApiKey {
1307            location: ApiKeyLocation::Header,
1308            name,
1309        } = s
1310        {
1311            apikey_headers.insert(name.to_lowercase());
1312        }
1313        if let SecuritySchemeInfo::ApiKey {
1314            location: ApiKeyLocation::Cookie,
1315            ..
1316        } = s
1317        {
1318            apikey_headers.insert("cookie".into());
1319        }
1320    }
1321    headers
1322        .iter()
1323        .filter(|(k, _)| {
1324            let lk = k.to_lowercase();
1325            lk != "authorization" && !apikey_headers.contains(&lk)
1326        })
1327        .cloned()
1328        .collect()
1329}
1330
1331/// Remove API-key query parameters declared by the operation's
1332/// security schemes from `query`, so a probe can supply its own.
1333fn strip_auth_query(
1334    query: &[(String, String)],
1335    schemes: &[SecuritySchemeInfo],
1336) -> Vec<(String, String)> {
1337    let mut apikey_query: std::collections::BTreeSet<String> = Default::default();
1338    for s in schemes {
1339        if let SecuritySchemeInfo::ApiKey {
1340            location: ApiKeyLocation::Query,
1341            name,
1342        } = s
1343        {
1344            apikey_query.insert(name.clone());
1345        }
1346    }
1347    query.iter().filter(|(k, _)| !apikey_query.contains(k)).cloned().collect()
1348}
1349
1350/// Variant of `send_case` that takes an explicit `extra_headers`
1351/// (rather than reading them from `config`). Used by security probes
1352/// to substitute or strip the configured Authorization header.
1353#[allow(clippy::too_many_arguments)]
1354async fn send_case_with_extra(
1355    client: &Client,
1356    config: &SelfTestConfig,
1357    method: Method,
1358    url: &str,
1359    label: &str,
1360    expected_4xx: bool,
1361    body: Option<&str>,
1362    query: Vec<(String, String)>,
1363    headers: Vec<(String, String)>,
1364    extra_headers: Vec<(String, String)>,
1365) -> CaseOutcome {
1366    let mut req = client.request(method.clone(), url);
1367    let mut capture_headers: BTreeMap<String, String> = BTreeMap::new();
1368    for (k, v) in &query {
1369        req = req.query(&[(k.as_str(), v.as_str())]);
1370    }
1371    // Round 28 — reqwest's `.header(k, v)` APPENDS rather than replaces
1372    // (.headers().insert() would replace but isn't on the builder).
1373    // The previous round-25 fix relied on "last-write-wins" semantics
1374    // that don't exist; for content-type-swap probes the request went
1375    // out with BOTH `Content-Type: application/json` AND `Content-Type:
1376    // application/xml`, and axum's `Json<>` extractor picked the JSON
1377    // one and accepted, so the server-side validator never saw the
1378    // mismatch. Build a `HeaderMap` ourselves so the override
1379    // replaces the body-block default exactly once.
1380    let mut final_headers: reqwest::header::HeaderMap = reqwest::header::HeaderMap::new();
1381    if let Some(_b) = body {
1382        if let Ok(v) = reqwest::header::HeaderValue::from_str("application/json") {
1383            final_headers.insert(reqwest::header::CONTENT_TYPE, v);
1384        }
1385        capture_headers.insert("Content-Type".to_string(), "application/json".to_string());
1386    }
1387    for (k, v) in &headers {
1388        if let (Ok(hn), Ok(hv)) = (
1389            reqwest::header::HeaderName::from_bytes(k.as_bytes()),
1390            reqwest::header::HeaderValue::from_str(v),
1391        ) {
1392            final_headers.insert(hn, hv);
1393        }
1394        capture_headers.insert(k.clone(), v.clone());
1395    }
1396    for (k, v) in &extra_headers {
1397        if let (Ok(hn), Ok(hv)) = (
1398            reqwest::header::HeaderName::from_bytes(k.as_bytes()),
1399            reqwest::header::HeaderValue::from_str(v),
1400        ) {
1401            final_headers.insert(hn, hv);
1402        }
1403        capture_headers.insert(k.clone(), v.clone());
1404    }
1405    if let Some(b) = body {
1406        req = req.body(b.to_string());
1407    }
1408    req = req.headers(final_headers);
1409    let (actual_status, response_capture) = match req.send().await {
1410        Ok(resp) => {
1411            let status = resp.status().as_u16();
1412            if let Some(sink) = &config.capture {
1413                let resp_headers: BTreeMap<String, String> = resp
1414                    .headers()
1415                    .iter()
1416                    .map(|(k, v)| (k.as_str().to_string(), v.to_str().unwrap_or("").to_string()))
1417                    .collect();
1418                let text = resp.text().await.unwrap_or_default();
1419                let (rb, truncated) = truncate_body_for_capture(&text);
1420                (status, Some((Some((rb, truncated)), resp_headers, None, sink.clone())))
1421            } else {
1422                (status, None)
1423            }
1424        }
1425        Err(e) => {
1426            let err_str = e.to_string();
1427            if let Some(sink) = &config.capture {
1428                (0, Some((None, BTreeMap::new(), Some(err_str), sink.clone())))
1429            } else {
1430                (0, None)
1431            }
1432        }
1433    };
1434    let passed = if expected_4xx {
1435        (400..500).contains(&actual_status)
1436    } else {
1437        (200..400).contains(&actual_status)
1438    };
1439    if let Some((resp_body, resp_headers, error, sink)) = response_capture {
1440        let (request_body, request_body_truncated) = match body {
1441            Some(b) => {
1442                let (rb, t) = truncate_body_for_capture(b);
1443                (Some(rb), t)
1444            }
1445            None => (None, false),
1446        };
1447        let (response_body, response_body_truncated) = match resp_body {
1448            Some((rb, t)) => (Some(rb), t),
1449            None => (None, false),
1450        };
1451        let entry = CaseCapture {
1452            label: label.to_string(),
1453            method: method.to_string(),
1454            url: build_query_url(url, &query),
1455            request_headers: capture_headers,
1456            request_body,
1457            request_body_truncated,
1458            response_status: actual_status,
1459            response_headers: resp_headers,
1460            response_body,
1461            response_body_truncated,
1462            error,
1463            // Filled in by the per-operation validation pass after
1464            // every probe finishes; the capture itself is unaware of
1465            // the schema map.
1466            response_schema_error: None,
1467            // Round 28 — derive the expected range from the probe's
1468            // `expected_4xx` flag so the JSONL line and HTML viewer
1469            // can show mismatches without re-deriving on the read side.
1470            expected_status_range: if expected_4xx {
1471                "4xx".into()
1472            } else {
1473                "2xx-3xx".into()
1474            },
1475        };
1476        if let Ok(mut guard) = sink.lock() {
1477            guard.push(entry);
1478        }
1479    }
1480    CaseOutcome {
1481        label: label.to_string(),
1482        expected_4xx,
1483        actual_status,
1484        passed,
1485    }
1486}
1487
1488// HTTP request shape needs all of these: client, config (for capture
1489// sink + extra headers), method, url, label (probe id), expected_4xx
1490// (pass/fail decision), body, query, headers. A struct wrapper would
1491// just move the arity from positional to field access without making
1492// the call sites clearer.
1493#[allow(clippy::too_many_arguments)]
1494async fn send_case(
1495    client: &Client,
1496    config: &SelfTestConfig,
1497    method: Method,
1498    url: &str,
1499    label: &str,
1500    expected_4xx: bool,
1501    body: Option<&str>,
1502    query: Vec<(String, String)>,
1503    headers: Vec<(String, String)>,
1504) -> CaseOutcome {
1505    // Forwarding to `send_case_with_extra` keeps the capture logic in
1506    // one place so request/response tracing can't drift between the
1507    // two entrypoints.
1508    send_case_with_extra(
1509        client,
1510        config,
1511        method,
1512        url,
1513        label,
1514        expected_4xx,
1515        body,
1516        query,
1517        headers,
1518        config.extra_headers.clone(),
1519    )
1520    .await
1521}
1522
1523/// Round 23 (c-iii) — rebuild the query-stringified URL for capture so
1524/// the JSONL trace shows the URL that actually went over the wire
1525/// (reqwest applies `.query(..)` after the request URL string is
1526/// rendered, so capturing the raw `url` argument alone loses the
1527/// query params).
1528fn build_query_url(base: &str, query: &[(String, String)]) -> String {
1529    if query.is_empty() {
1530        return base.to_string();
1531    }
1532    let qs: String = query
1533        .iter()
1534        .map(|(k, v)| format!("{}={}", urlencoding::encode(k), urlencoding::encode(v)))
1535        .collect::<Vec<_>>()
1536        .join("&");
1537    if base.contains('?') {
1538        format!("{base}&{qs}")
1539    } else {
1540        format!("{base}?{qs}")
1541    }
1542}
1543
1544/// Substitute `{param}` placeholders in the spec path with their
1545/// sample values from `path_params`, then prepend `target_url`. Empty
1546/// values are kept as `{param}` so an upstream router still matches
1547/// the template — useful when `path_params` is empty and we want to
1548/// hit the same route the spec defines.
1549///
1550/// All current call sites went through `build_url_with_base` after
1551/// round 18.1, so this no-base-path helper is unused; keep it as the
1552/// documented shim for future external callers (one-arg simplification).
1553#[allow(dead_code)]
1554fn build_url(target: &str, path_template: &str, path_params: &[(String, String)]) -> String {
1555    build_url_with_base(target, None, path_template, path_params)
1556}
1557
1558/// Round 18.1 — variant of `build_url` that takes a `base_path`
1559/// (e.g. `Some("/api")`). When set, prepends it to the spec path so a
1560/// spec declaring `/users` against a target served behind `/api`
1561/// resolves to `<target>/api/users`. `base_path` is normalised: leading
1562/// `/` is auto-added, trailing `/` is stripped.
1563fn build_url_with_base(
1564    target: &str,
1565    base_path: Option<&str>,
1566    path_template: &str,
1567    path_params: &[(String, String)],
1568) -> String {
1569    let mut url = path_template.to_string();
1570    for (name, value) in path_params {
1571        let placeholder = format!("{{{}}}", name);
1572        if !value.is_empty() {
1573            url = url.replace(&placeholder, value);
1574        }
1575    }
1576    let target = target.trim_end_matches('/');
1577    let prefix = match base_path {
1578        Some(bp) if !bp.is_empty() => {
1579            let trimmed = bp.trim_end_matches('/');
1580            if trimmed.starts_with('/') {
1581                trimmed.to_string()
1582            } else {
1583                format!("/{}", trimmed)
1584            }
1585        }
1586        _ => String::new(),
1587    };
1588    let path = if url.starts_with('/') {
1589        url
1590    } else {
1591        format!("/{url}")
1592    };
1593    format!("{target}{prefix}{path}")
1594}
1595
1596#[cfg(test)]
1597mod tests {
1598    use super::*;
1599
1600    fn op(
1601        method: &str,
1602        path: &str,
1603        body: Option<&str>,
1604        query: Vec<(&str, &str)>,
1605        headers: Vec<(&str, &str)>,
1606        path_params: Vec<(&str, &str)>,
1607    ) -> AnnotatedOperation {
1608        AnnotatedOperation {
1609            method: method.into(),
1610            path: path.into(),
1611            features: Vec::new(),
1612            request_body_content_type: body.map(|_| "application/json".into()),
1613            sample_body: body.map(|s| s.to_string()),
1614            query_params: query.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1615            header_params: headers.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1616            path_params: path_params.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1617            response_schema: None,
1618            response_schemas: std::collections::BTreeMap::new(),
1619            request_body_schema: None,
1620            security_schemes: Vec::new(),
1621        }
1622    }
1623
1624    #[test]
1625    fn build_url_substitutes_path_params() {
1626        let url = build_url(
1627            "https://api.test/",
1628            "/users/{id}/posts/{pid}",
1629            &[("id".into(), "42".into()), ("pid".into(), "7".into())],
1630        );
1631        assert_eq!(url, "https://api.test/users/42/posts/7");
1632    }
1633
1634    /// Round 18.1 — a run where every positive 404s should be flagged
1635    /// as a likely target misconfiguration, not silently treated as a
1636    /// successful conformance run.
1637    #[test]
1638    fn detect_target_misconfiguration_when_all_positives_share_status() {
1639        let mut report = SelfTestReport {
1640            positive_pass: 0,
1641            positive_fail: 50,
1642            ..Default::default()
1643        };
1644        for i in 0..50 {
1645            report.operations.push(OperationResult {
1646                method: "GET".into(),
1647                path: format!("/r/{i}"),
1648                positive: Some(CaseOutcome {
1649                    label: "positive".into(),
1650                    expected_4xx: false,
1651                    actual_status: 404,
1652                    passed: false,
1653                }),
1654                negatives: Vec::new(),
1655            });
1656        }
1657        assert_eq!(report.detect_target_misconfiguration(), Some(404));
1658    }
1659
1660    #[test]
1661    fn detect_target_misconfiguration_returns_none_when_some_pass() {
1662        let mut report = SelfTestReport {
1663            positive_pass: 5,
1664            positive_fail: 50,
1665            ..Default::default()
1666        };
1667        for i in 0..55 {
1668            report.operations.push(OperationResult {
1669                method: "GET".into(),
1670                path: format!("/r/{i}"),
1671                positive: Some(CaseOutcome {
1672                    label: "positive".into(),
1673                    expected_4xx: false,
1674                    actual_status: if i < 5 { 200 } else { 404 },
1675                    passed: i < 5,
1676                }),
1677                negatives: Vec::new(),
1678            });
1679        }
1680        assert_eq!(report.detect_target_misconfiguration(), None);
1681    }
1682
1683    /// Round 18.1 — `--base-path /api` should prepend `/api` to
1684    /// every spec path. Pre-fix, the self-test ignored base_path and
1685    /// 404'd every positive when the deployed API was behind a path
1686    /// prefix.
1687    #[test]
1688    fn build_url_applies_base_path_when_present() {
1689        let url = build_url_with_base(
1690            "https://api.example.com",
1691            Some("/api"),
1692            "/users/{id}",
1693            &[("id".into(), "42".into())],
1694        );
1695        assert_eq!(url, "https://api.example.com/api/users/42");
1696    }
1697
1698    /// Round 18.1 — base_path is normalised: missing leading slash
1699    /// gets one added, trailing slash is stripped, empty string is
1700    /// the same as None.
1701    #[test]
1702    fn build_url_normalises_base_path() {
1703        let no_slash = build_url_with_base("https://t", Some("api"), "/x", &[]);
1704        assert_eq!(no_slash, "https://t/api/x");
1705        let trailing = build_url_with_base("https://t", Some("/api/"), "/x", &[]);
1706        assert_eq!(trailing, "https://t/api/x");
1707        let empty = build_url_with_base("https://t", Some(""), "/x", &[]);
1708        assert_eq!(empty, "https://t/x");
1709        let none = build_url_with_base("https://t", None, "/x", &[]);
1710        assert_eq!(none, "https://t/x");
1711    }
1712
1713    #[test]
1714    fn build_url_keeps_placeholders_when_no_sample() {
1715        let url = build_url("https://api.test", "/users/{id}", &[]);
1716        assert_eq!(url, "https://api.test/users/{id}");
1717    }
1718
1719    #[test]
1720    fn report_summary_calls_out_misses() {
1721        let r = SelfTestReport {
1722            positive_pass: 3,
1723            positive_fail: 0,
1724            negative_caught: BTreeMap::from([("request-body".into(), 2)]),
1725            negative_missed: BTreeMap::from([("request-body".into(), 1)]),
1726            operations: Vec::new(),
1727        };
1728        let summary = r.render_summary();
1729        assert!(summary.contains("Positives: 3 pass / 0 fail"));
1730        assert!(summary.contains("Negatives [request-body]: 2 caught / 1 missed"));
1731        assert!(summary.contains("⚠"));
1732        assert!(!r.all_passed());
1733    }
1734
1735    #[test]
1736    fn report_all_passed_when_no_miss() {
1737        let r = SelfTestReport {
1738            positive_pass: 5,
1739            positive_fail: 0,
1740            negative_caught: BTreeMap::from([("parameters".into(), 3)]),
1741            negative_missed: BTreeMap::new(),
1742            operations: Vec::new(),
1743        };
1744        assert!(r.all_passed());
1745        assert!(r.render_summary().contains("✓"));
1746    }
1747
1748    #[tokio::test]
1749    async fn run_self_test_against_unreachable_target_marks_all_failed() {
1750        // Use an obviously-dead port so we exercise the timeout/error
1751        // path without needing a live server in tests.
1752        let cfg = SelfTestConfig {
1753            target_url: "http://127.0.0.1:1".into(),
1754            timeout: Duration::from_millis(200),
1755            ..Default::default()
1756        };
1757        let ops = vec![op(
1758            "POST",
1759            "/users",
1760            Some("{\"name\":\"a\"}"),
1761            vec![],
1762            vec![],
1763            vec![],
1764        )];
1765        let report = run_self_test(&ops, &cfg).await.expect("client builds");
1766        // All cases hit the connect-error path → actual_status=0.
1767        // Positive expects 2xx-3xx → 0 is fail. Negatives expect 4xx
1768        // → 0 is also fail (we missed catching).
1769        assert_eq!(report.positive_fail, 1);
1770        assert!(report.negative_missed.values().sum::<usize>() >= 1);
1771        assert!(!report.all_passed());
1772    }
1773
1774    /// Round 17.2 — operations with both a positive sample AND a
1775    /// resolved request-body schema produce schema-driven negatives
1776    /// in addition to the spec-agnostic empty/wrong-type ones. The
1777    /// labels carry the field path so a per-category report can tell
1778    /// you exactly which field caught.
1779    #[tokio::test]
1780    async fn schema_driven_negatives_fire_when_schema_present() {
1781        use openapiv3::{ObjectType, ReferenceOr, Schema, SchemaData, SchemaKind, Type};
1782        let cfg = SelfTestConfig {
1783            target_url: "http://127.0.0.1:1".into(),
1784            timeout: Duration::from_millis(200),
1785            ..Default::default()
1786        };
1787        // Build an operation whose schema has a required `name` string
1788        // and an `age` integer. The mutator should produce, at
1789        // minimum: required-removed:name, required-removed:age,
1790        // type-mismatch:name, type-mismatch:age, integer-as-float:age,
1791        // plus the root-level type-mismatch.
1792        let mut obj = ObjectType::default();
1793        obj.properties.insert(
1794            "name".to_string(),
1795            ReferenceOr::Item(Box::new(Schema {
1796                schema_data: SchemaData::default(),
1797                schema_kind: SchemaKind::Type(Type::String(Default::default())),
1798            })),
1799        );
1800        obj.properties.insert(
1801            "age".to_string(),
1802            ReferenceOr::Item(Box::new(Schema {
1803                schema_data: SchemaData::default(),
1804                schema_kind: SchemaKind::Type(Type::Integer(Default::default())),
1805            })),
1806        );
1807        obj.required = vec!["name".into(), "age".into()];
1808        let schema = Schema {
1809            schema_data: SchemaData::default(),
1810            schema_kind: SchemaKind::Type(Type::Object(obj)),
1811        };
1812
1813        let mut o =
1814            op("POST", "/users", Some(r#"{"name":"Ada","age":30}"#), vec![], vec![], vec![]);
1815        o.request_body_schema = Some(schema);
1816        let report = run_self_test(&[o], &cfg).await.expect("client builds");
1817        // Bucket labels from the operation result.
1818        let labels: std::collections::BTreeSet<String> = report
1819            .operations
1820            .iter()
1821            .flat_map(|op| op.negatives.iter().map(|n| n.label.clone()))
1822            .collect();
1823        assert!(
1824            labels.iter().any(|l| l.starts_with("request-body:type-mismatch:")),
1825            "missing type-mismatch negative; got {labels:?}"
1826        );
1827        assert!(
1828            labels.iter().any(|l| l.starts_with("request-body:required-removed:")),
1829            "missing required-removed negative; got {labels:?}"
1830        );
1831        assert!(
1832            labels.iter().any(|l| l == "parameters:uri-too-long"),
1833            "missing URI-length negative; got {labels:?}"
1834        );
1835    }
1836
1837    /// Round 16 — operations with a body OR a path-param now produce
1838    /// negatives even without a sample body. Previously a POST whose
1839    /// body annotator failed produced *zero* negatives, so the self-test
1840    /// always reported "all passing" for that endpoint.
1841    #[tokio::test]
1842    async fn no_sample_body_still_produces_request_body_negatives() {
1843        let cfg = SelfTestConfig {
1844            target_url: "http://127.0.0.1:1".into(),
1845            timeout: Duration::from_millis(200),
1846            ..Default::default()
1847        };
1848        // POST with a body content type but no sample (annotator gap).
1849        let ops = vec![op("POST", "/x", None, vec![], vec![], vec![])];
1850        // No sample_body but request_body_content_type set:
1851        let mut ops_fixed = ops;
1852        ops_fixed[0].request_body_content_type = Some("application/json".into());
1853        let report = run_self_test(&ops_fixed, &cfg).await.expect("client builds");
1854        // Both request-body negatives (empty + wrong-type) should fire,
1855        // landing in `negative_missed` because the unreachable target
1856        // returns no 4xx. The point: count > 0.
1857        assert!(
1858            report.negative_missed.values().sum::<usize>() >= 2,
1859            "expected ≥2 request-body negatives, got {:?}",
1860            report.negative_missed
1861        );
1862    }
1863
1864    /// Round 16 — operations with a path-param now get a probe even
1865    /// when there's no body / required query / required header.
1866    /// Previously `/teams/{team-id}` with no other required fields
1867    /// produced zero negatives → always "all passing".
1868    #[tokio::test]
1869    async fn path_param_only_endpoint_produces_a_probe() {
1870        let cfg = SelfTestConfig {
1871            target_url: "http://127.0.0.1:1".into(),
1872            timeout: Duration::from_millis(200),
1873            ..Default::default()
1874        };
1875        let ops = vec![op(
1876            "GET",
1877            "/teams/{team-id}",
1878            None,
1879            vec![],
1880            vec![],
1881            vec![("team-id", "1")],
1882        )];
1883        let report = run_self_test(&ops, &cfg).await.expect("client builds");
1884        let total: usize = report.negative_caught.values().sum::<usize>()
1885            + report.negative_missed.values().sum::<usize>();
1886        assert!(total >= 1, "expected ≥1 path-param probe, got {:?}", report);
1887    }
1888
1889    /// Round 18.5 — when `geo_ip` is set, every default forwarded-
1890    /// IP header gets the IP appended (X-Forwarded-For,
1891    /// True-Client-IP, CF-Connecting-IP).
1892    #[test]
1893    fn effective_op_headers_appends_geo_ip_to_default_headers() {
1894        let ip: IpAddr = "203.0.113.42".parse().unwrap();
1895        let headers = effective_op_headers(
1896            &[("Accept".into(), "application/json".into())],
1897            Some(ip),
1898            &default_geo_source_headers(),
1899        );
1900        let names: Vec<&str> = headers.iter().map(|(k, _)| k.as_str()).collect();
1901        assert!(names.contains(&"Accept"));
1902        assert!(names.contains(&"X-Forwarded-For"));
1903        assert!(names.contains(&"True-Client-IP"));
1904        assert!(names.contains(&"CF-Connecting-IP"));
1905        // Every geo header carries the same IP value.
1906        let geo_values: Vec<&str> =
1907            headers.iter().filter(|(k, _)| k != "Accept").map(|(_, v)| v.as_str()).collect();
1908        for v in geo_values {
1909            assert_eq!(v, "203.0.113.42");
1910        }
1911    }
1912
1913    /// Round 18.5 — operations that already declare a forwarded-IP
1914    /// header (rare but legal — some specs hard-code one) keep their
1915    /// declared value; we don't clobber the spec.
1916    #[test]
1917    fn effective_op_headers_respects_spec_declared_header() {
1918        let ip: IpAddr = "203.0.113.99".parse().unwrap();
1919        let headers = effective_op_headers(
1920            &[("x-forwarded-for".into(), "10.0.0.1".into())],
1921            Some(ip),
1922            &["X-Forwarded-For".to_string()],
1923        );
1924        // The spec's lower-case value wins; we shouldn't add a
1925        // second X-Forwarded-For row that overrides it.
1926        let xff: Vec<&str> = headers
1927            .iter()
1928            .filter(|(k, _)| k.eq_ignore_ascii_case("x-forwarded-for"))
1929            .map(|(_, v)| v.as_str())
1930            .collect();
1931        assert_eq!(xff, vec!["10.0.0.1"]);
1932    }
1933
1934    /// Round 18.5 — None geo_ip and/or empty header list is a no-op.
1935    #[test]
1936    fn effective_op_headers_is_a_noop_without_geo_ip() {
1937        let base = vec![("Accept".into(), "json".into())];
1938        let h1 = effective_op_headers(&base, None, &default_geo_source_headers());
1939        assert_eq!(h1, base);
1940        let ip: IpAddr = "10.0.0.1".parse().unwrap();
1941        let h2 = effective_op_headers(&base, Some(ip), &[]);
1942        assert_eq!(h2, base);
1943    }
1944
1945    /// Round 18.5 — empty `source_ips` builds a single default
1946    /// client; a non-empty list builds N clients each attempting to
1947    /// bind. We can't reliably test the actual bind on CI (no
1948    /// loopback aliases), but a loopback IP is always bind-able.
1949    #[test]
1950    fn build_client_pool_one_per_source_ip() {
1951        let mut cfg = SelfTestConfig {
1952            target_url: "http://127.0.0.1:1".into(),
1953            timeout: Duration::from_millis(200),
1954            ..Default::default()
1955        };
1956        // Empty → one default client.
1957        assert_eq!(build_client_pool(&cfg).expect("default builds").len(), 1);
1958        // Non-empty → one per IP. Loopback bind is portable.
1959        cfg.source_ips = vec!["127.0.0.1".parse().unwrap()];
1960        assert_eq!(build_client_pool(&cfg).expect("bind loopback").len(), 1);
1961    }
1962
1963    /// Round 18.5 — geo IPs round-robin across operations. Hits an
1964    /// unreachable target so we can inspect the case outcomes; the
1965    /// point is to confirm `op_headers` carried the geo IP through
1966    /// (CaseOutcome doesn't surface headers directly, so we just
1967    /// verify the run completes without panicking and the result
1968    /// shape is correct when source_ips is non-empty too).
1969    #[tokio::test]
1970    async fn run_self_test_with_geo_source_completes() {
1971        let cfg = SelfTestConfig {
1972            target_url: "http://127.0.0.1:1".into(),
1973            timeout: Duration::from_millis(200),
1974            geo_source_ips: vec![
1975                "203.0.113.1".parse().unwrap(),
1976                "203.0.113.2".parse().unwrap(),
1977            ],
1978            ..Default::default()
1979        };
1980        let ops = vec![
1981            op("GET", "/a", None, vec![], vec![], vec![]),
1982            op("GET", "/b", None, vec![], vec![], vec![]),
1983            op("GET", "/c", None, vec![], vec![], vec![]),
1984        ];
1985        let report = run_self_test(&ops, &cfg).await.expect("client builds");
1986        assert_eq!(report.operations.len(), 3);
1987    }
1988
1989    /// Round 24 (f) — Srikanth saw the geo header on positive probes
1990    /// only; the four negative-probe call sites were passing
1991    /// `op.header_params` directly instead of `op_headers`, so the
1992    /// geo IP got dropped. This test runs a self-test that includes
1993    /// negative probes (uri-too-long, missing-query, etc.) under
1994    /// `--conformance-self-test-capture`, then asserts that EVERY
1995    /// captured probe (positive AND negative) carries one of the
1996    /// configured forwarded-IP headers.
1997    #[tokio::test]
1998    async fn geo_headers_present_on_every_probe_with_capture() {
1999        let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2000        let cfg = SelfTestConfig {
2001            target_url: "http://127.0.0.1:1".into(),
2002            timeout: Duration::from_millis(50),
2003            geo_source_ips: vec!["203.0.113.5".parse().unwrap()],
2004            capture: Some(sink.clone()),
2005            ..Default::default()
2006        };
2007        // An operation rich enough to trip several negative-probe
2008        // branches: header param (→ missing-header), query param
2009        // (→ missing-query), and a sample body (→ schema mutations
2010        // wouldn't fire without a schema, but uri-too-long always
2011        // does).
2012        let ops = vec![op(
2013            "GET",
2014            "/items",
2015            Some("{}"),
2016            vec![("id", "1")],
2017            vec![("X-Trace", "x")],
2018            vec![],
2019        )];
2020        let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2021        let captures = sink.lock().unwrap();
2022        assert!(!captures.is_empty(), "self-test should record probes");
2023        // For every captured probe, at least one of the default geo
2024        // headers must be present and equal to the configured IP.
2025        let geo_headers: std::collections::HashSet<&str> =
2026            ["X-Forwarded-For", "True-Client-IP", "CF-Connecting-IP"].into_iter().collect();
2027        for c in captures.iter() {
2028            let has_geo = c
2029                .request_headers
2030                .iter()
2031                .any(|(k, v)| geo_headers.contains(k.as_str()) && v == "203.0.113.5");
2032            assert!(
2033                has_geo,
2034                "probe `{}` is missing the geo IP header; got headers: {:?}",
2035                c.label, c.request_headers
2036            );
2037        }
2038    }
2039
2040    /// Round 25 (k) — operations with a JSON request body now get four
2041    /// content-type-swap probes (xml / yaml / multipart / urlencoded).
2042    /// Verify they:
2043    ///   1. fire only when the operation declares a JSON body
2044    ///   2. carry the wrong Content-Type the probe is testing for
2045    ///   3. don't fire on body-less operations
2046    #[tokio::test]
2047    async fn content_type_swap_probes_fire_for_json_bodies() {
2048        let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2049        let cfg = SelfTestConfig {
2050            target_url: "http://127.0.0.1:1".into(),
2051            timeout: Duration::from_millis(50),
2052            capture: Some(sink.clone()),
2053            ..Default::default()
2054        };
2055        let ops = vec![
2056            op("POST", "/users", Some("{\"name\":\"a\"}"), vec![], vec![], vec![]),
2057            op("GET", "/ping", None, vec![], vec![], vec![]),
2058        ];
2059        let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2060        let captures = sink.lock().unwrap();
2061
2062        let swap_labels: Vec<&str> = captures
2063            .iter()
2064            .filter(|c| c.label.starts_with("request-body:content-type-mismatch:"))
2065            .map(|c| c.label.as_str())
2066            .collect();
2067        assert_eq!(
2068            swap_labels.len(),
2069            4,
2070            "expected 4 content-type-swap probes (one per variant), got: {swap_labels:?}"
2071        );
2072        let expected_labels = [
2073            "request-body:content-type-mismatch:xml",
2074            "request-body:content-type-mismatch:yaml",
2075            "request-body:content-type-mismatch:multipart",
2076            "request-body:content-type-mismatch:urlencoded",
2077        ];
2078        for want in expected_labels {
2079            assert!(swap_labels.contains(&want), "missing swap probe `{want}`");
2080        }
2081
2082        // Each swap probe must carry the wrong Content-Type it's
2083        // testing for — that's the whole point.
2084        for c in captures.iter() {
2085            let Some(suffix) = c.label.strip_prefix("request-body:content-type-mismatch:") else {
2086                continue;
2087            };
2088            let want_ct = match suffix {
2089                "xml" => "application/xml",
2090                "yaml" => "application/yaml",
2091                "multipart" => "multipart/form-data",
2092                "urlencoded" => "application/x-www-form-urlencoded",
2093                _ => continue,
2094            };
2095            let got_ct = c
2096                .request_headers
2097                .iter()
2098                .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
2099                .map(|(_, v)| v.as_str())
2100                .unwrap_or("");
2101            assert_eq!(got_ct, want_ct, "swap probe `{}` sent wrong CT", c.label);
2102        }
2103
2104        // The body-less operation must NOT produce content-type-swap
2105        // probes (no body → no content type to lie about).
2106        let body_less_swaps = captures
2107            .iter()
2108            .filter(|c| {
2109                c.label.starts_with("request-body:content-type-mismatch:")
2110                    && c.url.ends_with("/ping")
2111            })
2112            .count();
2113        assert_eq!(
2114            body_less_swaps, 0,
2115            "GET /ping has no request body; should not produce content-type-swap probes"
2116        );
2117    }
2118
2119    /// Round 27 (k variant b) — Srikanth's round-23 follow-up on (k):
2120    /// JSON envelope with embedded non-JSON field values. For each
2121    /// JSON-body operation, four extra probes fire that send valid
2122    /// JSON with an XML/YAML/multipart/urlencoded snippet stuffed
2123    /// into a string field. Content-Type stays `application/json`;
2124    /// expected is 2xx-3xx (the body parses); a 5xx flags a server
2125    /// that crashed on the embedded content.
2126    #[tokio::test]
2127    async fn embedded_content_probes_fire_with_honest_content_type() {
2128        let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2129        let cfg = SelfTestConfig {
2130            target_url: "http://127.0.0.1:1".into(),
2131            timeout: Duration::from_millis(50),
2132            capture: Some(sink.clone()),
2133            ..Default::default()
2134        };
2135        let ops = vec![op(
2136            "POST",
2137            "/users",
2138            Some("{\"name\":\"alice\",\"age\":30}"),
2139            vec![],
2140            vec![],
2141            vec![],
2142        )];
2143        let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2144        let captures = sink.lock().unwrap();
2145        let embedded: Vec<&CaseCapture> = captures
2146            .iter()
2147            .filter(|c| c.label.starts_with("request-body:embedded-content:"))
2148            .collect();
2149        assert_eq!(
2150            embedded.len(),
2151            4,
2152            "expected 4 embedded-content probes, got: {:?}",
2153            embedded.iter().map(|c| &c.label).collect::<Vec<_>>()
2154        );
2155        // Every embedded probe must carry the honest application/json
2156        // Content-Type (NOT lie like the variant-a content-type-swap
2157        // probes do) and a request body that still parses as JSON.
2158        for c in &embedded {
2159            let ct = c
2160                .request_headers
2161                .iter()
2162                .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
2163                .map(|(_, v)| v.as_str())
2164                .unwrap_or("");
2165            assert!(
2166                ct.contains("application/json"),
2167                "embedded probe `{}` should keep Content-Type honest, got {ct}",
2168                c.label
2169            );
2170            let body = c.request_body.as_deref().unwrap_or("");
2171            assert!(
2172                serde_json::from_str::<serde_json::Value>(body).is_ok(),
2173                "embedded probe `{}` body should still be valid JSON, got: {body}",
2174                c.label
2175            );
2176        }
2177    }
2178
2179    /// `embed_payload_in_first_string_field` walks objects depth-first
2180    /// and replaces only the FIRST string-valued leaf, leaving the
2181    /// surrounding structure intact.
2182    #[test]
2183    fn embed_payload_replaces_first_string_only() {
2184        let sample = r#"{"name":"alice","age":30,"tags":["admin","user"]}"#;
2185        let mutated = embed_payload_in_first_string_field(sample, "<x/>");
2186        let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2187        assert_eq!(v["name"], serde_json::json!("<x/>"));
2188        // age stays an integer (not stringified by the mutation).
2189        assert_eq!(v["age"], serde_json::json!(30));
2190        // tags array's strings stay untouched (we only replace the
2191        // first encountered string leaf, depth-first).
2192        assert_eq!(v["tags"][0], serde_json::json!("admin"));
2193        assert_eq!(v["tags"][1], serde_json::json!("user"));
2194    }
2195
2196    /// When the sample has NO string field, the helper falls back to
2197    /// `{"data": "<snippet>"}` so the probe still has something to
2198    /// POST. The fallback must produce valid JSON regardless of what
2199    /// characters the snippet contains.
2200    #[test]
2201    fn embed_payload_falls_back_when_no_string_field() {
2202        let no_strings = r#"{"a":1,"b":[2,3]}"#;
2203        let mutated = embed_payload_in_first_string_field(no_strings, "<x><y></y></x>");
2204        let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2205        assert_eq!(v["data"], serde_json::json!("<x><y></y></x>"));
2206    }
2207
2208    #[test]
2209    fn embed_payload_handles_invalid_json_sample() {
2210        let not_json = "garbage";
2211        let mutated = embed_payload_in_first_string_field(not_json, "a=1&b=2");
2212        let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2213        assert_eq!(v["data"], serde_json::json!("a=1&b=2"));
2214    }
2215
2216    /// Round 26 — Srikanth saw `at /: Type { kind: Single` in his
2217    /// 0.3.169 capture for the vCenter `infraprofile/configs` 202
2218    /// response (spec promised `type: string`, server returned a
2219    /// JSON object). The output was a broken-syntax debug string.
2220    /// This test reproduces his exact spec+body and asserts the
2221    /// message is readable.
2222    #[test]
2223    fn response_schema_error_message_is_readable() {
2224        let schema = serde_json::json!({"type": "string"});
2225        let body = r#"{"data":{},"id":"generated_id","status":"created"}"#;
2226        let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2227        // The message must NOT contain Rust debug syntax leftovers
2228        // ("Type { kind:", trailing "{" or "(" tokens). It SHOULD say
2229        // what type was expected.
2230        assert!(!err.contains("Type { kind"), "stale debug output: {err}");
2231        assert!(!err.contains("{ kind:"), "stale debug output: {err}");
2232        assert!(err.contains("string"), "should name expected type: {err}");
2233        // Round 29 — Srikanth on 0.3.172 was confused by `at /:`,
2234        // thinking it pointed to the URL path. The new format
2235        // explicitly says "response body root" for the root case
2236        // (and "response body at /<pointer>" for nested fields).
2237        assert!(
2238            err.contains("response body root"),
2239            "should label root explicitly so reader knows it's not the URL: {err}"
2240        );
2241        // Round 28 — Srikanth wanted the expected schema embedded
2242        // in the message so it reads as 'expected schema {"type":"string"}'.
2243        assert!(
2244            err.contains("expected schema") && err.contains("\"type\":\"string\""),
2245            "should include expected schema JSON: {err}"
2246        );
2247    }
2248
2249    /// Round 29 — for non-root paths the format reads
2250    /// "response body at /name: ...". Catches the case where the
2251    /// root rewording accidentally dropped the JSON-pointer for
2252    /// nested fields.
2253    #[test]
2254    fn response_schema_error_uses_response_body_prefix_for_nested_paths() {
2255        let schema = serde_json::json!({
2256            "type": "object",
2257            "required": ["name"],
2258            "properties": {"name": {"type": "string"}}
2259        });
2260        let body = r#"{"name": 123}"#;
2261        let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2262        assert!(
2263            err.contains("response body at /name"),
2264            "nested path should read 'response body at /name': {err}"
2265        );
2266        assert!(!err.contains("response body root"), "wrong label for nested: {err}");
2267        // Round 30 — the "expected schema" suffix should be the
2268        // sub-schema at /name, not the entire object schema. Reader
2269        // shouldn't have to scan a 300-char object to find the
2270        // constraint that failed.
2271        assert!(
2272            err.contains(r#"expected schema {"type":"string"}"#),
2273            "should show only the /name sub-schema, not the full object: {err}"
2274        );
2275    }
2276
2277    /// Round 30 — Srikanth asked how a deeper nested mismatch reads.
2278    /// Schema: `name.type` should be a string; body has it as a number.
2279    /// JSON pointer is `/name/type`.
2280    #[test]
2281    fn response_schema_error_uses_response_body_prefix_for_deep_nested_paths() {
2282        let schema = serde_json::json!({
2283            "type": "object",
2284            "properties": {
2285                "name": {
2286                    "type": "object",
2287                    "properties": {"type": {"type": "string"}}
2288                }
2289            }
2290        });
2291        let body = r#"{"name": {"type": 123}}"#;
2292        let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2293        assert!(
2294            err.contains("response body at /name/type"),
2295            "deep nested path should read 'response body at /name/type': {err}"
2296        );
2297        // Round 30 — for deep paths the sub-schema is the leaf
2298        // {"type":"string"}, not the wrapping object schemas.
2299        assert!(
2300            err.contains(r#"expected schema {"type":"string"}"#),
2301            "should show only the /name/type leaf sub-schema: {err}"
2302        );
2303    }
2304
2305    /// Round 30 — when the instance pointer can't be resolved through
2306    /// the schema's `properties` chain (e.g. additionalProperties hit),
2307    /// `sub_schema_at_pointer` returns None and the message falls back
2308    /// to the full schema. Verifies the fallback path is wired.
2309    #[test]
2310    fn sub_schema_at_pointer_falls_back_for_unresolvable_paths() {
2311        let schema = serde_json::json!({"type":"object","additionalProperties":true});
2312        // Walker can't resolve /unknown, so we get the full schema back.
2313        assert_eq!(
2314            sub_schema_at_pointer(&schema, "/unknown"),
2315            None,
2316            "unresolvable path should return None to trigger fallback"
2317        );
2318        // Root path returns the whole schema.
2319        assert_eq!(sub_schema_at_pointer(&schema, "/"), Some(schema.clone()));
2320        assert_eq!(sub_schema_at_pointer(&schema, ""), Some(schema));
2321    }
2322
2323    #[test]
2324    fn response_schema_error_required_field_is_readable() {
2325        let schema = serde_json::json!({
2326            "type": "object",
2327            "required": ["id"],
2328            "properties": {"id": {"type": "integer"}}
2329        });
2330        let body = r#"{"other": 1}"#;
2331        let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2332        assert!(err.contains("required field missing"), "{err}");
2333        assert!(err.contains("id"), "{err}");
2334    }
2335
2336    /// Round 31 — Srikanth's vCenter case on 0.3.174: the
2337    /// `Appliance.Recovery.Backup.SystemName.Archive.Info` schema has
2338    /// a multi-paragraph description and ~6 required fields, of which
2339    /// `comment` was missing in the response. Before this fix the
2340    /// printed schema was the WHOLE parent object schema (parent's
2341    /// description bleeding in, all sibling property schemas dumped)
2342    /// truncated to 300 chars; after the fix it's the missing field's
2343    /// own schema. Verifies (a) parent description is gone and
2344    /// (b) sibling property names don't appear in the message.
2345    #[test]
2346    fn response_schema_error_required_focuses_on_missing_field_only() {
2347        let schema = serde_json::json!({
2348            "description": "The Appliance.Recovery.Backup.SystemName.Archive.Info schema represents backup archive information.\n\nThis schema was added in vSphere API 6.7.",
2349            "type": "object",
2350            "required": ["comment", "location", "parts", "system_name", "timestamp", "version"],
2351            "properties": {
2352                "comment": {
2353                    "type": "string",
2354                    "description": "Custom comment added by the user for this backup."
2355                },
2356                "location": {"type": "string", "description": "Backup location URL."},
2357                "parts": {"type": "array", "items": {"type": "string"}},
2358                "system_name": {"type": "string"},
2359                "timestamp": {"type": "string", "format": "date-time"},
2360                "version": {"type": "string"}
2361            }
2362        });
2363        let body = r#"{"location":"x","parts":[],"system_name":"y","timestamp":"z","version":"v"}"#;
2364        let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2365        assert!(err.contains("required field missing: \"comment\""), "{err}");
2366        // Parent's description should not appear; only the `comment`
2367        // field's own description (if any) may.
2368        assert!(
2369            !err.contains("Appliance.Recovery.Backup"),
2370            "parent description should not bleed into focused schema: {err}"
2371        );
2372        // No sibling property names should appear in the focused schema
2373        // suffix.
2374        for sibling in ["location", "parts", "system_name", "timestamp", "version"] {
2375            assert!(
2376                !err.contains(&format!("\"{sibling}\"")),
2377                "sibling field {sibling} should not appear in focused schema: {err}"
2378            );
2379        }
2380    }
2381
2382    #[test]
2383    fn response_schema_error_none_on_match() {
2384        let schema = serde_json::json!({"type": "string"});
2385        assert_eq!(validate_body_against_schema("\"hello\"", &schema), None);
2386    }
2387
2388    #[test]
2389    fn json_serialises_report() {
2390        let r = SelfTestReport {
2391            positive_pass: 1,
2392            positive_fail: 0,
2393            negative_caught: BTreeMap::new(),
2394            negative_missed: BTreeMap::new(),
2395            operations: vec![OperationResult {
2396                method: "GET".into(),
2397                path: "/x".into(),
2398                positive: Some(CaseOutcome {
2399                    label: "positive".into(),
2400                    expected_4xx: false,
2401                    actual_status: 200,
2402                    passed: true,
2403                }),
2404                negatives: Vec::new(),
2405            }],
2406        };
2407        let json = serde_json::to_value(&r).expect("serialises");
2408        assert_eq!(json["positive_pass"], serde_json::json!(1));
2409        assert_eq!(json["operations"][0]["positive"]["actual_status"], serde_json::json!(200));
2410    }
2411}