mockforge_bench/conformance/self_test.rs
1//! Positive + per-category negative request driver against a live server.
2//!
3//! Issue #79 round 13 (4) — Srikanth's (e) ask: a way to test both
4//! positive and negative compliance scenarios separately, where the
5//! positive cases should pass and the negative cases should be
6//! rejected.
7//!
8//! This module sits *alongside* the existing conformance executor
9//! (which drives k6 / native checks on a single positive call per
10//! operation). The self-test driver synthesises per-category
11//! deliberately-bad requests and asserts that the server actually
12//! rejects them with a 4xx — useful when verifying that
13//! `validate_request_with_all` is wired correctly for the user's spec
14//! (the exact gap that round-13 (3) fixed).
15//!
16//! Scope of the initial MVP: covers the highest-signal negatives —
17//! empty body when one is required, missing required query/header
18//! params, and wrong-type path params. Doesn't try to mutate every
19//! field of a JSON-Schema-validated body; that's a follow-up.
20
21use super::spec_driven::{AnnotatedOperation, ApiKeyLocation, SecuritySchemeInfo};
22use reqwest::{Client, Method};
23use std::collections::BTreeMap;
24use std::net::IpAddr;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::sync::{Arc, Mutex};
27use std::time::Duration;
28
29/// Round 23 (c-iii) — per-direction body cap when capturing
30/// request/response payloads to `conformance-self-test-requests.jsonl`.
31/// 16 KiB keeps a 1000-case run under ~32 MB even if every payload
32/// fills the cap, while still preserving enough of a typical JSON body
33/// (or a stack-trace error response) to debug from.
34const CAPTURE_BODY_CAP_BYTES: usize = 16 * 1024;
35
36/// Round 17.2 — cap on schema-driven negatives per operation. A spec
37/// with 100 properties per body could produce hundreds of mutations
38/// for a single operation; combined with thousands of operations
39/// that's a runaway test matrix. 12 covers the highest-signal
40/// mutations (type mismatch + required-removed + a few constraint
41/// breaks) without exploding wall time on large specs.
42const SCHEMA_MUTATION_CAP: usize = 12;
43
44/// Round 25 (k) — content-type swap probes. For operations declaring a
45/// JSON request body, each entry below produces one probe that lies
46/// about Content-Type while keeping the JSON payload. A spec-compliant
47/// server should respond 415 (or 400). Order matches the order
48/// Srikanth listed in his round-23 reply: XML, YAML, multipart, and
49/// the URL-encoded variant he added in round 24.
50const CONTENT_TYPE_SWAP_VARIANTS: &[(&str, &str)] = &[
51 ("application/xml", "request-body:content-type-mismatch:xml"),
52 ("application/yaml", "request-body:content-type-mismatch:yaml"),
53 ("multipart/form-data", "request-body:content-type-mismatch:multipart"),
54 (
55 "application/x-www-form-urlencoded",
56 "request-body:content-type-mismatch:urlencoded",
57 ),
58];
59
60/// Round 27 (k variant b) — embedded content payloads. Content-Type
61/// stays `application/json` and the envelope IS valid JSON; we just
62/// stuff a non-JSON snippet into a string field's value. The test
63/// surfaces servers that try to parse string field contents (e.g.
64/// XML-EE expanders, YAML loaders, urlencoded parsers) and crash on
65/// the payload — a 5xx here is the finding. Label, payload pairs:
66const EMBEDDED_CONTENT_VARIANTS: &[(&str, &str)] = &[
67 ("request-body:embedded-content:xml", "<root><cmd>execute()</cmd></root>"),
68 ("request-body:embedded-content:yaml", "key: value\n- item1\n- item2"),
69 (
70 "request-body:embedded-content:multipart",
71 "--boundary\r\nContent-Disposition: form-data; name=\"x\"\r\n\r\nval\r\n--boundary--",
72 ),
73 ("request-body:embedded-content:urlencoded", "a=1&b=2&c=hello%20world"),
74];
75
76/// Configuration for a self-test run.
77#[derive(Debug, Clone)]
78pub struct SelfTestConfig {
79 pub target_url: String,
80 pub skip_tls_verify: bool,
81 pub timeout: Duration,
82 /// Optional extra headers to attach to every request (e.g. auth).
83 pub extra_headers: Vec<(String, String)>,
84 /// Delay between requests to avoid hammering the server.
85 pub delay_between_requests: Duration,
86 /// Round 18.1 — base path to prepend to every spec path. When the
87 /// spec declares `/users` and the deployed API is served under
88 /// `/api`, `--base-path /api` should make the self-test hit
89 /// `https://target/api/users` instead of `https://target/users`.
90 /// Pre-fix this was ignored entirely and every operation 404'd
91 /// (Srikanth's vCenter run on 0.3.152: 1275 positives, 1275 4xx).
92 pub base_path: Option<String>,
93 /// Round 18.5 — local source IPs to bind outgoing requests to.
94 /// Each IP must already be assigned to an interface on the host.
95 /// Operations round-robin through the resulting client pool.
96 pub source_ips: Vec<IpAddr>,
97 /// Round 18.5 — fake source IPs to advertise via forwarded-IP
98 /// headers (used to exercise GEODB lookup at the destination).
99 /// Rotated per operation.
100 pub geo_source_ips: Vec<IpAddr>,
101 /// Which forwarded-IP header(s) to populate when `geo_source_ips`
102 /// is non-empty. Empty → no-op; default below sets the standard
103 /// three-header set.
104 pub geo_source_headers: Vec<String>,
105 /// Round 23 (c-iii) — when `Some`, every probe captures method, URL,
106 /// request headers/body and response status/headers/body into this
107 /// sink. Caller drains it after `run_self_test` and writes
108 /// `conformance-self-test-requests.jsonl`. None → no capture (zero
109 /// extra allocations on the hot path).
110 pub capture: Option<Arc<Mutex<Vec<CaseCapture>>>>,
111 /// Round 25 — when true, validate every probe's response body
112 /// against the spec's response schema for the actual status
113 /// returned (closes round 21.3 / Srikanth's a2 / a3 ask). The
114 /// validation result lands in `CaseCapture::response_schema_error`
115 /// (None → matched, or no schema for that status). Default false:
116 /// JSON-Schema validation of large response bodies adds wall-clock
117 /// time and the user has to opt in.
118 pub validate_response_schemas: bool,
119 /// Round 33 (#823) — human-readable label for the OpenAPI spec
120 /// this run is exercising. Stamped on every `CaseCapture` so the
121 /// per-endpoint summary can attribute rows back to a spec in
122 /// multi-spec / multi-target benches. `None` when the bench didn't
123 /// track a spec path.
124 pub spec_label: Option<String>,
125}
126
127/// Round 23 (c-iii) — one captured request/response pair, one per
128/// probe (positive or negative). Serialised as a JSON line in
129/// `conformance-self-test-requests.jsonl`. Headers are kept as
130/// `BTreeMap` for stable ordering. Bodies are truncated to
131/// `CAPTURE_BODY_CAP_BYTES`; `*_truncated` flags whether more was
132/// dropped.
133#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
134pub struct CaseCapture {
135 pub label: String,
136 pub method: String,
137 pub url: String,
138 pub request_headers: BTreeMap<String, String>,
139 pub request_body: Option<String>,
140 pub request_body_truncated: bool,
141 pub response_status: u16,
142 pub response_headers: BTreeMap<String, String>,
143 pub response_body: Option<String>,
144 pub response_body_truncated: bool,
145 pub error: Option<String>,
146 /// Round 25 — when `validate_response_schemas` is on and the spec
147 /// declares a schema for `response_status`, this carries the
148 /// validation message (or None when the body matched, or no schema
149 /// was declared for that status). Serialised verbatim in the JSONL
150 /// and rendered in the HTML viewer.
151 #[serde(default, skip_serializing_if = "Option::is_none")]
152 pub response_schema_error: Option<String>,
153 /// Round 28 — Srikanth's "Is it possible to put expected response
154 /// code status in both jsonl and jsonl report" ask. Human-readable
155 /// expected status range: `"2xx-3xx"` for positive probes,
156 /// `"4xx"` for negatives. Lets users `jq` for misses
157 /// (`.response_status as $s | .expected_status_range == "4xx"
158 /// and ($s < 400 or $s >= 500)`) and powers the HTML viewer's
159 /// "show mismatches only" filter.
160 #[serde(default)]
161 pub expected_status_range: String,
162 /// Round 33 (#823) — the spec's path template (e.g.
163 /// `/users/{id}`) before path-param substitution. Lets the
164 /// per-endpoint summary collapse `/users/X` and `/users/Y` into
165 /// one row. Empty string when the call site predates this field
166 /// (older `CaseCapture` payloads on disk also deserialise OK).
167 #[serde(default)]
168 pub path_template: String,
169 /// Round 33 (#823) — basename (or fallback to full path) of the
170 /// OpenAPI spec file this probe came from. Lets multi-spec runs
171 /// attribute rows back to the spec they came from. `None` when
172 /// the bench didn't track a spec path.
173 #[serde(default, skip_serializing_if = "Option::is_none")]
174 pub spec_label: Option<String>,
175 /// Round 36 (#876) — mockforge version that ran the probe.
176 /// Stamped from `CARGO_PKG_VERSION` at compile time. Also sent
177 /// as the `X-Mockforge-Client-Version` request header so a
178 /// matching `ServerConformanceViolation.client_mockforge_version`
179 /// can be cross-correlated. Empty string when the capture
180 /// pre-dates this field.
181 #[serde(default)]
182 pub mockforge_version: String,
183 /// Round 36 (#876) — wall-clock moment the bench driver sent the
184 /// request, as RFC3339 / ISO-8601. Also sent as the
185 /// `X-Mockforge-Client-Sent-At` request header so the server-side
186 /// `ServerConformanceViolation.client_sent_at` carries the same
187 /// value. Empty string when the capture pre-dates this field.
188 #[serde(default)]
189 pub client_sent_at: String,
190}
191
192impl Default for SelfTestConfig {
193 fn default() -> Self {
194 Self {
195 target_url: "http://localhost:3000".into(),
196 skip_tls_verify: false,
197 timeout: Duration::from_secs(15),
198 extra_headers: Vec::new(),
199 delay_between_requests: Duration::from_millis(0),
200 base_path: None,
201 source_ips: Vec::new(),
202 geo_source_ips: Vec::new(),
203 geo_source_headers: default_geo_source_headers(),
204 capture: None,
205 validate_response_schemas: false,
206 spec_label: None,
207 }
208 }
209}
210
211/// Truncate `body` to `CAPTURE_BODY_CAP_BYTES` on a UTF-8 boundary,
212/// returning the trimmed string and whether truncation occurred. Used
213/// for both request and response bodies in the capture sink.
214fn truncate_body_for_capture(body: &str) -> (String, bool) {
215 if body.len() <= CAPTURE_BODY_CAP_BYTES {
216 return (body.to_string(), false);
217 }
218 let mut end = CAPTURE_BODY_CAP_BYTES;
219 while end > 0 && !body.is_char_boundary(end) {
220 end -= 1;
221 }
222 (body[..end].to_string(), true)
223}
224
225/// Default forwarded-IP header set. Covers the three conventions a
226/// real GEODB front-end is likely to read in this order of
227/// preference: Cloudflare (`CF-Connecting-IP`), Akamai/CloudFront
228/// (`True-Client-IP`), then the de-facto standard
229/// `X-Forwarded-For`. Override via `--geo-source-header` to test a
230/// specific stack.
231pub fn default_geo_source_headers() -> Vec<String> {
232 vec![
233 "X-Forwarded-For".to_string(),
234 "True-Client-IP".to_string(),
235 "CF-Connecting-IP".to_string(),
236 ]
237}
238
239/// Outcome of a single test case (positive or negative).
240#[derive(Debug, Clone, serde::Serialize)]
241pub struct CaseOutcome {
242 pub label: String,
243 pub expected_4xx: bool,
244 pub actual_status: u16,
245 /// True when the response status matches expectation
246 /// (positive → 2xx-3xx, negative → 4xx).
247 pub passed: bool,
248}
249
250/// All cases run against one annotated operation.
251#[derive(Debug, Clone, serde::Serialize)]
252pub struct OperationResult {
253 pub method: String,
254 pub path: String,
255 pub positive: Option<CaseOutcome>,
256 pub negatives: Vec<CaseOutcome>,
257}
258
259/// Summary report rolled up across all operations.
260#[derive(Debug, Default, Clone, serde::Serialize)]
261pub struct SelfTestReport {
262 pub positive_pass: usize,
263 pub positive_fail: usize,
264 /// Per category: count of negative cases the server correctly
265 /// rejected with a 4xx (we caught the spec violation).
266 pub negative_caught: BTreeMap<String, usize>,
267 /// Per category: count of negative cases that should have been
268 /// rejected but came back with a non-4xx (validator gap).
269 pub negative_missed: BTreeMap<String, usize>,
270 pub operations: Vec<OperationResult>,
271}
272
273impl SelfTestReport {
274 /// All-pass means every positive case got 2xx-3xx and every
275 /// negative case got 4xx.
276 pub fn all_passed(&self) -> bool {
277 self.positive_fail == 0 && self.negative_missed.values().sum::<usize>() == 0
278 }
279
280 /// Round 18.1 — detect the "self-test target is misconfigured"
281 /// case where every positive failed with the *same* status code.
282 /// The classic example: `--base-path /api` was forgotten so every
283 /// request hits a path the server doesn't know and returns 404.
284 /// Pre-warning, the user saw all-green negative buckets (because
285 /// "missing route" 404s look like "validator rejected") and no
286 /// indication that the run was meaningless. Returns Some(status)
287 /// when ≥10 positives all failed with the same status, else None.
288 pub fn detect_target_misconfiguration(&self) -> Option<u16> {
289 if self.positive_pass > 0 || self.positive_fail < 10 {
290 return None;
291 }
292 let mut seen: Option<u16> = None;
293 for op in &self.operations {
294 let Some(p) = &op.positive else {
295 continue;
296 };
297 if p.passed {
298 return None;
299 }
300 match seen {
301 None => seen = Some(p.actual_status),
302 Some(s) if s != p.actual_status => return None,
303 _ => {}
304 }
305 }
306 seen
307 }
308
309 /// Human-readable summary string. One line for positives, one per
310 /// category for negatives. Designed to slot into existing
311 /// `TerminalReporter` output.
312 pub fn render_summary(&self) -> String {
313 let mut out = String::new();
314 out.push_str(&format!(
315 "Positives: {} pass / {} fail\n",
316 self.positive_pass, self.positive_fail
317 ));
318 let mut keys: Vec<&String> =
319 self.negative_caught.keys().chain(self.negative_missed.keys()).collect();
320 keys.sort();
321 keys.dedup();
322 for cat in keys {
323 let caught = self.negative_caught.get(cat).copied().unwrap_or(0);
324 let missed = self.negative_missed.get(cat).copied().unwrap_or(0);
325 let mark = if missed == 0 { "✓" } else { "⚠" };
326 out.push_str(&format!(
327 "Negatives [{}]: {} caught / {} missed {}\n",
328 cat, caught, missed, mark
329 ));
330 }
331 out
332 }
333}
334
335/// Execute the self-test plan against `config.target_url` for every
336/// `AnnotatedOperation`. Returns the aggregated report; callers
337/// decide how to display it (e.g. via `render_summary` or by writing
338/// the JSON serialisation to disk).
339pub async fn run_self_test(
340 operations: &[AnnotatedOperation],
341 config: &SelfTestConfig,
342) -> Result<SelfTestReport, reqwest::Error> {
343 // Round 18.5 — build a client pool when `source_ips` is set,
344 // one reqwest::Client per IP, each bound to its local address.
345 // Operations round-robin through the pool. Empty pool → single
346 // default client (the pre-18.5 behaviour).
347 let clients = build_client_pool(config)?;
348 let client_cursor = AtomicUsize::new(0);
349 let geo_cursor = AtomicUsize::new(0);
350
351 let mut report = SelfTestReport::default();
352 for op in operations {
353 let client_idx = client_cursor.fetch_add(1, Ordering::Relaxed) % clients.len();
354 let client = &clients[client_idx];
355 let geo_ip = if config.geo_source_ips.is_empty() {
356 None
357 } else {
358 let idx = geo_cursor.fetch_add(1, Ordering::Relaxed) % config.geo_source_ips.len();
359 Some(config.geo_source_ips[idx])
360 };
361 let result = test_operation(client, config, op, geo_ip).await;
362 if let Some(p) = &result.positive {
363 if p.passed {
364 report.positive_pass += 1;
365 } else {
366 report.positive_fail += 1;
367 }
368 }
369 for neg in &result.negatives {
370 let cat = neg.label.split(':').next().unwrap_or("other").to_string();
371 if neg.passed {
372 *report.negative_caught.entry(cat).or_insert(0) += 1;
373 } else {
374 *report.negative_missed.entry(cat).or_insert(0) += 1;
375 }
376 }
377 report.operations.push(result);
378 if !config.delay_between_requests.is_zero() {
379 tokio::time::sleep(config.delay_between_requests).await;
380 }
381 }
382 Ok(report)
383}
384
385/// Round 18.5 — append GEODB forwarded-IP headers to the
386/// operation's declared headers. Returns the original vec untouched
387/// when `geo_ip` is None or `geo_headers` is empty.
388///
389/// If the operation already declares one of the geo headers (rare
390/// but legal), we keep the operation's value — the caller's spec
391/// wins.
392fn effective_op_headers(
393 base: &[(String, String)],
394 geo_ip: Option<IpAddr>,
395 geo_headers: &[String],
396) -> Vec<(String, String)> {
397 let mut out = base.to_vec();
398 let Some(ip) = geo_ip else {
399 return out;
400 };
401 let value = ip.to_string();
402 for h in geo_headers {
403 // Case-insensitive duplicate check: don't override the
404 // spec's own declared value for the header.
405 if out.iter().any(|(k, _)| k.eq_ignore_ascii_case(h)) {
406 continue;
407 }
408 out.push((h.clone(), value.clone()));
409 }
410 out
411}
412
413/// Round 18.5 — build a pool of reqwest clients, one per declared
414/// source IP. Empty `source_ips` → a single default client.
415///
416/// The OS must already have each `source_ip` assigned to an
417/// interface; reqwest's `.local_address()` issues a `bind()` syscall
418/// at connect time, so an IP the kernel doesn't recognise surfaces
419/// as `EADDRNOTAVAIL` at request time, not at builder time.
420fn build_client_pool(config: &SelfTestConfig) -> Result<Vec<Client>, reqwest::Error> {
421 let make = |bind: Option<IpAddr>| -> Result<Client, reqwest::Error> {
422 let mut builder = Client::builder().timeout(config.timeout);
423 if config.skip_tls_verify {
424 builder = builder.danger_accept_invalid_certs(true);
425 }
426 if let Some(addr) = bind {
427 builder = builder.local_address(addr);
428 }
429 builder.build()
430 };
431 if config.source_ips.is_empty() {
432 Ok(vec![make(None)?])
433 } else {
434 config.source_ips.iter().map(|ip| make(Some(*ip))).collect()
435 }
436}
437
438async fn test_operation(
439 client: &Client,
440 config: &SelfTestConfig,
441 op: &AnnotatedOperation,
442 geo_ip: Option<IpAddr>,
443) -> OperationResult {
444 // Round 25 — track the sink length BEFORE we run any probes for
445 // this operation, so that after the probes finish we can mutate
446 // exactly the entries that belong to this op (the capture sink is
447 // shared but `run_self_test` iterates operations sequentially).
448 // Used by the response-schema validation pass below.
449 let sink_start = config.capture.as_ref().and_then(|s| s.lock().ok().map(|g| g.len()));
450
451 let url = build_url_with_base(
452 &config.target_url,
453 config.base_path.as_deref(),
454 &op.path,
455 &op.path_params,
456 );
457 let method = Method::from_bytes(op.method.to_uppercase().as_bytes()).unwrap_or(Method::GET);
458
459 // Round 34 (#828) — stamp every `CaseCapture` with the spec
460 // template PREFIXED by `--base-path`, so the per-endpoint
461 // summary's `path` column matches what the user sees in URLs
462 // and logs. Srikanth searched for `/api/appliance/access/...`
463 // and didn't find it because round 33 stored just `/appliance/
464 // access/...`. Same normalization as `build_url_with_base`:
465 // leading `/` auto-added, trailing `/` stripped, empty
466 // base_path → no prefix at all.
467 let path_template = {
468 let prefix = match config.base_path.as_deref() {
469 Some(bp) if !bp.is_empty() => {
470 let trimmed = bp.trim_end_matches('/');
471 if trimmed.starts_with('/') {
472 trimmed.to_string()
473 } else {
474 format!("/{}", trimmed)
475 }
476 }
477 _ => String::new(),
478 };
479 let path = if op.path.starts_with('/') {
480 op.path.clone()
481 } else {
482 format!("/{}", op.path)
483 };
484 format!("{prefix}{path}")
485 };
486
487 // Round 18.5 — pre-compute the operation's effective headers
488 // with the geo source IP baked in. Doing it once here keeps the
489 // per-case `send_case` calls below unchanged. When `geo_ip` is
490 // None the result equals `op.header_params`.
491 let op_headers = effective_op_headers(&op.header_params, geo_ip, &config.geo_source_headers);
492
493 // ── Positive case ────────────────────────────────────────────
494 let positive = send_case(
495 client,
496 config,
497 method.clone(),
498 &url,
499 "positive",
500 ExpectedOutcome::Success,
501 op.sample_body.as_deref(),
502 op.query_params.clone(),
503 op_headers.clone(),
504 &path_template,
505 )
506 .await;
507
508 // ── Negative cases ───────────────────────────────────────────
509 let mut negatives = Vec::new();
510
511 // (a) empty body when one is required.
512 //
513 // Round 16 — drop the `sample_body.is_some()` precondition. Operations
514 // whose body annotator couldn't synthesize a sample previously got
515 // zero negatives (so the self-test reported "all passing" even on
516 // POST /resource with a required body). The spec saying the operation
517 // *has* a request body is enough — an empty object is a valid
518 // negative regardless of whether we have a positive sample.
519 if op.request_body_content_type.is_some() {
520 negatives.push(
521 send_case(
522 client,
523 config,
524 method.clone(),
525 &url,
526 "request-body:empty",
527 ExpectedOutcome::ClientError,
528 Some("{}"),
529 op.query_params.clone(),
530 op_headers.clone(),
531 &path_template,
532 )
533 .await,
534 );
535
536 // (b) wrong-shaped body (array instead of object) — exercises
537 // top-level type validation independently of which fields are
538 // required.
539 negatives.push(
540 send_case(
541 client,
542 config,
543 method.clone(),
544 &url,
545 "request-body:wrong-type",
546 ExpectedOutcome::ClientError,
547 Some("[]"),
548 op.query_params.clone(),
549 op_headers.clone(),
550 &path_template,
551 )
552 .await,
553 );
554
555 // Round 25 (k) — content-type swap probes.
556 //
557 // For operations declaring `application/json` request bodies, send
558 // the SAME json payload (or a synthesised one) under four other
559 // content types: `application/xml`, `application/yaml`,
560 // `multipart/form-data`, `application/x-www-form-urlencoded`.
561 // The spec says the endpoint accepts only JSON, so a strict server
562 // should respond 415 Unsupported Media Type (or 400 if it tries
563 // to parse and fails). A 2xx means the server is accepting
564 // payloads outside its declared content negotiation, which is the
565 // failure mode behind a lot of "we crashed on a malformed XML
566 // upload" incidents.
567 //
568 // Variant (a) of Srikanth's round-23 g ask: lie about the
569 // Content-Type header. The body shape is honest JSON; only the
570 // header is swapped. Variant (b) (JSON envelope with embedded
571 // non-JSON field values) is deferred to round 26 because it
572 // requires a schema-aware field walker.
573 if op
574 .request_body_content_type
575 .as_deref()
576 .map(|ct| ct.contains("json"))
577 .unwrap_or(false)
578 {
579 let payload = op.sample_body.as_deref().unwrap_or("{}");
580 for (ct, label) in CONTENT_TYPE_SWAP_VARIANTS {
581 negatives.push(
582 send_case_with_extra(
583 client,
584 config,
585 method.clone(),
586 &url,
587 label,
588 ExpectedOutcome::ClientError,
589 Some(payload),
590 op.query_params.clone(),
591 // Strip any Content-Type already on the operation
592 // headers (the spec's positive value) so the
593 // probe's value is the only one the server sees.
594 op_headers
595 .iter()
596 .filter(|(k, _)| !k.eq_ignore_ascii_case("content-type"))
597 .cloned()
598 .collect(),
599 // The wrong Content-Type rides on `extra_headers`
600 // so it lands AFTER `send_case_with_extra`'s
601 // unconditional `application/json` insertion in
602 // request-body mode. Actually `send_case_with_extra`
603 // only sets Content-Type when a body is present
604 // AND there's no manual override; passing the
605 // override here wins because reqwest preserves
606 // the last-set header value.
607 vec![("Content-Type".to_string(), (*ct).to_string())],
608 &path_template,
609 )
610 .await,
611 );
612 }
613
614 // Round 27 (k variant b) — embedded non-JSON content
615 // inside a valid JSON envelope. Content-Type stays
616 // application/json (honest) and the body parses as JSON;
617 // only the string-valued payload changes. We expect 2xx-3xx
618 // because the envelope is spec-shape, so the probe surfaces
619 // servers that crash (5xx) trying to parse the embedded
620 // snippet as XML/YAML/etc. A 4xx is also a finding because
621 // it usually means the server's pattern/format validator
622 // tripped on the payload contents, but the user can decide
623 // from the JSONL whether that's a bug or correct narrow-
624 // string-field behaviour.
625 for (label, snippet) in EMBEDDED_CONTENT_VARIANTS {
626 let payload = op.sample_body.as_deref().unwrap_or("{}");
627 // Round 34 (#829) — skip the probe entirely when the
628 // positive sample has no string leaf we can mutate.
629 // The previous round-27 fallback `{"data": <snippet>}`
630 // produced a body that doesn't match the spec's actual
631 // schema for endpoints like vCenter's `consolecli` PUT
632 // (which wants `{enabled: bool}`), so the server
633 // correctly 400'd and the bench misreported the
634 // mismatch as an expectation failure.
635 let Some(body) = embed_payload_in_first_string_field(payload, snippet) else {
636 continue;
637 };
638 negatives.push(
639 send_case(
640 client,
641 config,
642 method.clone(),
643 &url,
644 label,
645 // expected_4xx=false: any non-2xx is a probe
646 // failure. 5xx in particular is "server panicked
647 // on the embedded content".
648 ExpectedOutcome::NotServerError,
649 Some(&body),
650 op.query_params.clone(),
651 op_headers.clone(),
652 &path_template,
653 )
654 .await,
655 );
656 }
657 }
658
659 // Round 17.2 — schema-aware negatives.
660 //
661 // When both a positive sample AND the resolved body schema are
662 // available, mutate the sample per-field (type mismatch,
663 // min/max bounds, pattern, enum out-of-range, required-field
664 // removal) and assert each is rejected with 4xx. Capped at
665 // SCHEMA_MUTATION_CAP per operation so a 100-property body
666 // doesn't explode the test matrix.
667 if let (Some(sample_str), Some(schema)) =
668 (op.sample_body.as_deref(), op.request_body_schema.as_ref())
669 {
670 if let Ok(sample) = serde_json::from_str::<serde_json::Value>(sample_str) {
671 let mutations = super::schema_mutator::mutate_body(&sample, schema);
672 for m in mutations.into_iter().take(SCHEMA_MUTATION_CAP) {
673 let body_str = serde_json::to_string(&m.body).unwrap_or_default();
674 negatives.push(
675 send_case(
676 client,
677 config,
678 method.clone(),
679 &url,
680 &m.label,
681 ExpectedOutcome::ClientError,
682 Some(&body_str),
683 op.query_params.clone(),
684 // Round 24 (f) — was `op.header_params`, which
685 // skipped the geo-IP header. Use `op_headers`
686 // so the geo IP rides with the negative probe
687 // too (positive vs negative coverage must be
688 // symmetric, otherwise a GEODB front-end sees
689 // the rotating IP only on positives).
690 op_headers.clone(),
691 &path_template,
692 )
693 .await,
694 );
695 }
696 }
697 }
698 }
699
700 // Round 17.2 — URI-length probe. Spec-agnostic but schema-aware in
701 // spirit: most servers cap URIs at 8 KB or so. Append a 9 KB query
702 // string to the URL and expect 414 URI Too Long (or 400). Skipped
703 // for operations that already have a heavy positive query.
704 {
705 let pad = "p=".to_string() + &"x".repeat(9_000);
706 let bad_url = if url.contains('?') {
707 format!("{url}&{pad}")
708 } else {
709 format!("{url}?{pad}")
710 };
711 negatives.push(
712 send_case(
713 client,
714 config,
715 method.clone(),
716 &bad_url,
717 "parameters:uri-too-long",
718 ExpectedOutcome::ClientError,
719 op.sample_body.as_deref(),
720 op.query_params.clone(),
721 // Round 24 (f) — see schema-mutation note above. Use
722 // `op_headers` (carries geo IP) instead of bare
723 // `op.header_params`.
724 op_headers.clone(),
725 &path_template,
726 )
727 .await,
728 );
729 }
730
731 // (e) Round 16 — path-param type probe. Send the first path
732 // parameter as a literal `"self-test-invalid-id"`: a string that
733 // contains hyphens, won't parse as an integer, won't parse as a
734 // UUID, and won't match any typical regex pattern. Operations
735 // whose spec types the param as `integer` or `string` with a
736 // `format`/`pattern` will catch this (caught: server returned
737 // 4xx); operations whose spec lets path params be free-form
738 // strings will let it through (missed: server returned 2xx).
739 // Either outcome is informative: a category that's all "missed"
740 // tells the user their spec is loose on path-param types, which
741 // is itself worth knowing. Addresses Srikanth's "always all
742 // passing" report — operations with a path param now produce at
743 // least one probe instead of zero.
744 if !op.path_params.is_empty() {
745 let mut url_with_placeholder = op.path.clone();
746 if let Some((first_name, _)) = op.path_params.first() {
747 // Substitute every other path-param with its sample so the
748 // route shape stays intact and only the first param is bad.
749 for (name, value) in op.path_params.iter().skip(1) {
750 if !value.is_empty() {
751 url_with_placeholder =
752 url_with_placeholder.replace(&format!("{{{name}}}"), value);
753 }
754 }
755 // Substitute the first param with a guaranteed-invalid
756 // sentinel that's unlikely to match any reasonable schema:
757 // contains characters disallowed in numeric IDs *and* UUIDs.
758 url_with_placeholder =
759 url_with_placeholder.replace(&format!("{{{first_name}}}"), "self-test-invalid-id");
760 // Round 18.1 — honour `base_path` here too, otherwise the
761 // probe URL differs from the positive case and the
762 // resulting 404 is misattributed to "bad path param".
763 let bad_url = build_url_with_base(
764 &config.target_url,
765 config.base_path.as_deref(),
766 &url_with_placeholder,
767 &[],
768 );
769 negatives.push(
770 send_case(
771 client,
772 config,
773 method.clone(),
774 &bad_url,
775 "parameters:bad-path-param",
776 ExpectedOutcome::ClientError,
777 op.sample_body.as_deref(),
778 op.query_params.clone(),
779 op_headers.clone(),
780 &path_template,
781 )
782 .await,
783 );
784 }
785 }
786
787 // (c) drop the first required query param
788 if !op.query_params.is_empty() {
789 let mut q = op.query_params.clone();
790 q.remove(0);
791 negatives.push(
792 send_case(
793 client,
794 config,
795 method.clone(),
796 &url,
797 "parameters:missing-query",
798 ExpectedOutcome::ClientError,
799 op.sample_body.as_deref(),
800 q,
801 op_headers.clone(),
802 &path_template,
803 )
804 .await,
805 );
806 }
807
808 // (s) Round 17.3 — security probes.
809 //
810 // Operations whose spec declares a security requirement get a
811 // dedicated set of negatives. The point isn't to test whether the
812 // server's *real* auth works (the positive case already does that
813 // via `extra_headers`) — it's to check whether deliberately-bad
814 // credentials are still rejected, which is exactly the failure
815 // mode that lets an attacker through a half-wired validator.
816 //
817 // Each probe replaces or omits the relevant auth credential and
818 // expects 401 / 403. A 2xx here is a hard finding: "spec says
819 // this endpoint is protected, server let unauthenticated /
820 // wrong-credential traffic through".
821 //
822 // Bounded: at most one probe per declared scheme kind, so an
823 // operation with 3 security requirements doesn't 4× the request
824 // volume. Skips entirely when `op.security_schemes` is empty.
825 for probe in build_security_probes(&op.security_schemes) {
826 // Strip any pre-existing Authorization or known API-key
827 // header from extra_headers + header_params so the probe
828 // value is the *only* credential the server sees.
829 let stripped_extra = strip_auth(&config.extra_headers, &op.security_schemes);
830 let stripped_headers = strip_auth(&op.header_params, &op.security_schemes);
831 let stripped_query = strip_auth_query(&op.query_params, &op.security_schemes);
832 let mut req_headers = stripped_headers;
833 for (k, v) in &probe.headers {
834 req_headers.push((k.clone(), v.clone()));
835 }
836 // Round 24 (f) — security probes build req_headers from
837 // `op.header_params` directly (we need the stripped-auth
838 // variant), so the geo-IP header doesn't ride along
839 // automatically. Append it here so a GEODB / WAF in front
840 // of the auth layer still sees the rotating source IP.
841 if let Some(ip) = geo_ip {
842 let ip_str = ip.to_string();
843 for h in &config.geo_source_headers {
844 let already = req_headers.iter().any(|(k, _)| k.eq_ignore_ascii_case(h));
845 if !already {
846 req_headers.push((h.clone(), ip_str.clone()));
847 }
848 }
849 }
850 let mut req_query = stripped_query;
851 for (k, v) in &probe.query {
852 req_query.push((k.clone(), v.clone()));
853 }
854 negatives.push(
855 send_case_with_extra(
856 client,
857 config,
858 method.clone(),
859 &url,
860 &probe.label,
861 ExpectedOutcome::ClientError,
862 op.sample_body.as_deref(),
863 req_query,
864 req_headers,
865 stripped_extra,
866 &path_template,
867 )
868 .await,
869 );
870 }
871
872 // (d) drop the first required header
873 if !op.header_params.is_empty() {
874 // Round 24 (f) — start from `op_headers` (so the geo IP rides
875 // along) and only strip the first OPERATION-declared header.
876 // Slicing past `op.header_params.len()` would otherwise risk
877 // dropping the geo header itself; `op_headers` is built as
878 // `op.header_params ++ geo` so index 0 is always operational.
879 let mut h = op_headers.clone();
880 if !h.is_empty() {
881 h.remove(0);
882 }
883 negatives.push(
884 send_case(
885 client,
886 config,
887 method.clone(),
888 &url,
889 "parameters:missing-header",
890 ExpectedOutcome::ClientError,
891 op.sample_body.as_deref(),
892 op.query_params.clone(),
893 h,
894 &path_template,
895 )
896 .await,
897 );
898 }
899
900 // (w) Round 17.5 — OWASP/WAF unification.
901 //
902 // Pull one canonical payload per OWASP category from the existing
903 // `SecurityPayloads` library and emit an injection probe per
904 // category. Targets in priority order: (1) substitute the first
905 // query param's value, (2) substitute the first string field of
906 // the positive JSON body, (3) skip if neither is available.
907 //
908 // Label format `owasp:<category>`, so the existing
909 // `negative_caught` / `negative_missed` rollup groups all OWASP
910 // findings under one `owasp` bucket. Expected 4xx (server should
911 // reject malicious input). A 5xx is a hard finding (server
912 // crashed on the payload); a 2xx is a soft finding (input passed
913 // through unfiltered — may or may not be a real vuln).
914 //
915 // Bounded: at most one probe per category (7 categories total).
916 // Skips the operation entirely if no injection target is
917 // available — open GET endpoints with no params get zero OWASP
918 // probes, no false signal.
919 for probe in build_owasp_probes(op) {
920 negatives.push(
921 send_case(
922 client,
923 config,
924 method.clone(),
925 &url,
926 &probe.label,
927 ExpectedOutcome::ClientError,
928 probe.body.as_deref(),
929 probe.query,
930 // Round 24 (f) — OWASP injection probes must also
931 // carry the geo IP, otherwise a WAF / GEODB rule
932 // tuned to a specific source IP would silently let
933 // them through.
934 op_headers.clone(),
935 &path_template,
936 )
937 .await,
938 );
939 }
940
941 // Round 25 — response-body shape validation pass. For each capture
942 // this op pushed onto the sink, look up the spec's schema for the
943 // actual response status and validate. Result lands in
944 // `response_schema_error` (Some(message) on failure, None on
945 // pass or no-schema-for-this-status). Runs only when the user
946 // opted in AND capture is on (we need the body).
947 if config.validate_response_schemas {
948 if let (Some(sink), Some(start)) = (config.capture.as_ref(), sink_start) {
949 if !op.response_schemas.is_empty() {
950 if let Ok(mut guard) = sink.lock() {
951 let end = guard.len();
952 for i in start..end {
953 let Some(entry) = guard.get_mut(i) else {
954 continue;
955 };
956 let Some(body) = entry.response_body.as_deref() else {
957 continue;
958 };
959 let Some(schema) = op.response_schemas.get(&entry.response_status) else {
960 continue;
961 };
962 entry.response_schema_error = validate_body_against_schema(body, schema);
963 }
964 }
965 }
966 }
967 }
968
969 OperationResult {
970 method: op.method.clone(),
971 path: op.path.clone(),
972 positive: Some(positive),
973 negatives,
974 }
975}
976
977/// Round 25 — validate a JSON body string against an OpenAPI response
978/// schema (already converted to a `serde_json::Value`). Returns
979/// `Some(message)` describing the first violation, or `None` on a
980/// clean pass / non-JSON body / schema-build failure (in which case
981/// the absence of an error means "we didn't have anything to compare
982/// against", not "passed"; the caller-side semantics treat absence as
983/// success because that's what the user sees as silence).
984/// Round 27 (k variant b) — return a JSON body string identical to
985/// `sample` except that the first string-valued leaf has been
986/// replaced with `snippet`. Walks objects depth-first and stops at
987/// the first string. Returns `None` when `sample` is not parseable
988/// JSON or has no string field anywhere; the caller skips emitting
989/// a probe in that case (Round 34 #829: Srikanth on 0.3.178 found
990/// that the previous `{"data": <snippet>}` fallback envelope didn't
991/// match real-API schemas like vCenter's `{enabled: bool}` and the
992/// server correctly 400'd, which the bench then misreported as a
993/// `2xx-3xx` expectation miss).
994fn embed_payload_in_first_string_field(sample: &str, snippet: &str) -> Option<String> {
995 let mut parsed: serde_json::Value = serde_json::from_str(sample).ok()?;
996 if !replace_first_string(&mut parsed, snippet) {
997 return None;
998 }
999 serde_json::to_string(&parsed).ok()
1000}
1001
1002/// Helper for `embed_payload_in_first_string_field`: recursively
1003/// walk the value and replace the FIRST string leaf encountered.
1004/// Returns true when a replacement happened. Honors document order
1005/// for objects (BTreeMap-backed `serde_json::Map` iterates in
1006/// insertion order) so the choice of which field to mutate is
1007/// stable across runs.
1008fn replace_first_string(v: &mut serde_json::Value, snippet: &str) -> bool {
1009 match v {
1010 serde_json::Value::String(s) => {
1011 *s = snippet.to_string();
1012 true
1013 }
1014 serde_json::Value::Object(map) => {
1015 for (_k, child) in map.iter_mut() {
1016 if replace_first_string(child, snippet) {
1017 return true;
1018 }
1019 }
1020 false
1021 }
1022 serde_json::Value::Array(arr) => {
1023 for child in arr.iter_mut() {
1024 if replace_first_string(child, snippet) {
1025 return true;
1026 }
1027 }
1028 false
1029 }
1030 _ => false,
1031 }
1032}
1033
1034fn validate_body_against_schema(body: &str, schema: &serde_json::Value) -> Option<String> {
1035 let parsed: serde_json::Value = serde_json::from_str(body).ok()?;
1036 let validator = jsonschema::validator_for(schema).ok()?;
1037 let mut errors = validator.iter_errors(&parsed);
1038 let first = errors.next()?;
1039 // Round 28 — Srikanth on 0.3.170 wanted the message to show the
1040 // actual expected schema alongside the kind label so it reads as
1041 // "expected schema {...} but got <kind>". We emit a compact JSON
1042 // serialisation of the schema as a suffix; the kind label still
1043 // names what went wrong in plain English for quick scanning.
1044 // Round 26 — Srikanth on 0.3.169: the prior `format!("{:?}", first.kind)
1045 // .split('(').next()` produced "Type { kind: Single" (broken Rust
1046 // syntax, mismatched braces). Switch to the human-readable mapping
1047 // already used in executor.rs: handle the common kinds (Type,
1048 // Required, AdditionalProperties, Enum, MinLength, MaxLength,
1049 // Minimum, Maximum, Pattern) explicitly; fall back to the
1050 // jsonschema crate's Display impl on the error (which produces
1051 // something like "{...} is not of type \"string\"") for the long
1052 // tail. Combined with `at <instance-path>` for the field location.
1053 let path = first.instance_path.to_string();
1054 let path = if path.is_empty() { "/" } else { path.as_str() };
1055 // Round 31 — Srikanth on 0.3.174 hit the vCenter case where the
1056 // error is "required field missing: comment" but the printed
1057 // schema was the WHOLE parent object schema (with descriptions of
1058 // every property), not just the missing field's sub-schema. The
1059 // jsonschema crate emits `Required` errors with
1060 // `instance_path == /` (the parent), so the round-30 sub-schema
1061 // walker had no extra info to focus the suffix. Carry the missing
1062 // property name out of the kind match so we can descend one more
1063 // step into `properties[property]` for the printed schema.
1064 let mut required_property: Option<String> = None;
1065 let kind_msg: String = match &first.kind {
1066 jsonschema::error::ValidationErrorKind::Type { kind } => {
1067 // `kind` is `TypeKind::Single(JsonType)` or
1068 // `TypeKind::Multiple(JsonTypeSet)`. `JsonType` has its
1069 // own `Display` impl ("string", "object", etc.).
1070 match kind {
1071 jsonschema::error::TypeKind::Single(t) => format!("expected type {t}"),
1072 jsonschema::error::TypeKind::Multiple(_) => "expected one of multiple types".into(),
1073 }
1074 }
1075 jsonschema::error::ValidationErrorKind::Required { property } => {
1076 // `property.to_string()` returns the Display of the JSON
1077 // value, which for a string is `"name"` (with quotes).
1078 // Strip them for the lookup; keep them in the human message.
1079 let raw = property.to_string();
1080 let unquoted = raw
1081 .strip_prefix('"')
1082 .and_then(|s| s.strip_suffix('"'))
1083 .unwrap_or(&raw)
1084 .to_string();
1085 required_property = Some(unquoted);
1086 format!("required field missing: {property}")
1087 }
1088 jsonschema::error::ValidationErrorKind::AdditionalProperties { unexpected } => {
1089 format!("unexpected additional properties: {unexpected:?}")
1090 }
1091 jsonschema::error::ValidationErrorKind::Enum { options } => {
1092 format!("value not in allowed enum: {options}")
1093 }
1094 jsonschema::error::ValidationErrorKind::MinLength { limit } => {
1095 format!("string shorter than min length ({limit})")
1096 }
1097 jsonschema::error::ValidationErrorKind::MaxLength { limit } => {
1098 format!("string longer than max length ({limit})")
1099 }
1100 jsonschema::error::ValidationErrorKind::Minimum { limit } => {
1101 format!("value below minimum ({limit})")
1102 }
1103 jsonschema::error::ValidationErrorKind::Maximum { limit } => {
1104 format!("value above maximum ({limit})")
1105 }
1106 jsonschema::error::ValidationErrorKind::Pattern { pattern } => {
1107 format!("value did not match pattern {pattern}")
1108 }
1109 // Long tail: lean on jsonschema's Display impl, which is the
1110 // built-in human-readable error message ("X is not of type Y").
1111 // Strip trailing newlines so the JSONL line stays one line.
1112 _ => first.to_string().trim().to_string(),
1113 };
1114 // Round 30 — Srikanth on 0.3.173 asked how a deeper nested mismatch
1115 // reads. The prior output printed the WHOLE top-level schema even for
1116 // a single-field mismatch, which buried the actual constraint that
1117 // failed. Walk the instance pointer through the schema's properties
1118 // chain and print the most specific sub-schema we can find. Falls
1119 // back to the full schema for paths the walker can't resolve
1120 // (additionalProperties, oneOf, allOf, $ref un-resolved, etc.).
1121 let mut focused_schema = sub_schema_at_pointer(schema, path).unwrap_or_else(|| schema.clone());
1122 // Round 31 — for Required errors, descend one more step into
1123 // `properties[<missing>]` so the printed schema is the missing
1124 // field's own constraint, not the whole parent.
1125 if let Some(prop_name) = required_property.as_ref() {
1126 if let Some(prop_schema) =
1127 focused_schema.get("properties").and_then(|p| p.get(prop_name.as_str()))
1128 {
1129 focused_schema = prop_schema.clone();
1130 }
1131 }
1132 // Round 34 (#827) — Srikanth on 0.3.178 hit the vCenter
1133 // `enabled: boolean` case where the schema's multi-paragraph
1134 // `description` (and other prose fields) ate the 300-char budget
1135 // before the actually-useful `type` keyword could appear. Strip
1136 // the noise-fields recursively before serializing so the type
1137 // signal survives truncation; constraint keywords (`type`,
1138 // `properties`, `required`, `format`, `items`, etc.) stay.
1139 let focused_schema = strip_schema_noise(&focused_schema);
1140 let schema_str = serde_json::to_string(&focused_schema).unwrap_or_else(|_| "<schema>".into());
1141 let schema_str = if schema_str.len() > 300 {
1142 format!("{}...", &schema_str[..300])
1143 } else {
1144 schema_str
1145 };
1146 // Round 29 — Srikanth on 0.3.172 was confused by `at /:` thinking
1147 // it referenced the URL path; it's actually a JSON pointer into
1148 // the RESPONSE BODY. Reword so that's unambiguous: explicit
1149 // "response body" prefix and a human label for the root case.
1150 let location = if path == "/" {
1151 "response body root".to_string()
1152 } else {
1153 format!("response body at {path}")
1154 };
1155 Some(format!("{location}: {kind_msg}; expected schema {schema_str}"))
1156}
1157
1158/// Round 34 (#827) — drop the human-readable / documentation-only
1159/// fields from a JSON Schema before printing it inside a
1160/// `response_schema_error` message. The validator only cares about
1161/// constraint keywords (`type`, `required`, `properties`, `items`,
1162/// `format`, `enum`, `min*`/`max*`, `pattern`, `oneOf`/`anyOf`/
1163/// `allOf`/`not`); the prose fields can be paragraphs long for real-
1164/// world specs (vCenter's `enabled: bool` field has a multi-paragraph
1165/// description) and were eating the 300-char truncation budget before
1166/// the actually-useful type info could appear. Stripped fields:
1167/// `description`, `example`, `examples`, `summary`, `title`,
1168/// `externalDocs`, `xml`, `discriminator.description`.
1169fn strip_schema_noise(schema: &serde_json::Value) -> serde_json::Value {
1170 const NOISE_KEYS: &[&str] = &[
1171 "description",
1172 "example",
1173 "examples",
1174 "summary",
1175 "title",
1176 "externalDocs",
1177 "xml",
1178 ];
1179 match schema {
1180 serde_json::Value::Object(map) => {
1181 let mut out = serde_json::Map::with_capacity(map.len());
1182 for (k, v) in map {
1183 if NOISE_KEYS.contains(&k.as_str()) {
1184 continue;
1185 }
1186 out.insert(k.clone(), strip_schema_noise(v));
1187 }
1188 serde_json::Value::Object(out)
1189 }
1190 serde_json::Value::Array(items) => {
1191 serde_json::Value::Array(items.iter().map(strip_schema_noise).collect())
1192 }
1193 other => other.clone(),
1194 }
1195}
1196
1197/// Round 30 — walk a JSON-Pointer-style instance path through a JSON
1198/// Schema and return the sub-schema describing the value at that
1199/// position. For path `/name/age` on
1200/// `{"properties":{"name":{"properties":{"age":{"type":"integer"}}}}}`
1201/// returns `{"type":"integer"}`. Returns `None` for paths the walker
1202/// can't follow (array indices into `items` with no per-index schema,
1203/// `additionalProperties`, `oneOf`/`allOf`, unresolved `$ref`); callers
1204/// should fall back to the full schema in that case.
1205fn sub_schema_at_pointer(schema: &serde_json::Value, pointer: &str) -> Option<serde_json::Value> {
1206 if pointer.is_empty() || pointer == "/" {
1207 return Some(schema.clone());
1208 }
1209 let mut current = schema;
1210 for seg in pointer.trim_start_matches('/').split('/') {
1211 let unescaped = seg.replace("~1", "/").replace("~0", "~");
1212 if let Some(props) = current.get("properties") {
1213 if let Some(sub) = props.get(&unescaped) {
1214 current = sub;
1215 continue;
1216 }
1217 }
1218 if let Some(items) = current.get("items") {
1219 if items.is_object() {
1220 current = items;
1221 continue;
1222 }
1223 }
1224 return None;
1225 }
1226 Some(current.clone())
1227}
1228
1229/// Round 17.5 — one OWASP injection probe to send.
1230#[derive(Debug, Clone)]
1231struct OwaspProbe {
1232 label: String,
1233 body: Option<String>,
1234 query: Vec<(String, String)>,
1235}
1236
1237/// Build one OWASP probe per `SecurityCategory` for `op`. Targets the
1238/// first query param if any, else the first string field of the
1239/// positive JSON body. Returns empty if neither target is available.
1240fn build_owasp_probes(op: &AnnotatedOperation) -> Vec<OwaspProbe> {
1241 use crate::security_payloads::{SecurityCategory, SecurityPayloads};
1242
1243 let categories = [
1244 SecurityCategory::SqlInjection,
1245 SecurityCategory::Xss,
1246 SecurityCategory::CommandInjection,
1247 SecurityCategory::PathTraversal,
1248 SecurityCategory::Ssti,
1249 SecurityCategory::LdapInjection,
1250 SecurityCategory::Xxe,
1251 ];
1252
1253 // Pick an injection target ONCE per operation; reuse it across
1254 // categories. (A single op gets up to 7 probes — one per category
1255 // — all attacking the same field.)
1256 let injection_target = pick_injection_target(op);
1257 let Some(target) = injection_target else {
1258 return Vec::new();
1259 };
1260
1261 let mut probes = Vec::new();
1262 for cat in categories {
1263 // Take the *first* payload from each category. The
1264 // collection's first entry is the canonical low-risk
1265 // representative; later entries include time-based / blind
1266 // probes that aren't useful as a one-shot rejection test.
1267 let Some(payload) = SecurityPayloads::get_by_category(cat).into_iter().next() else {
1268 continue;
1269 };
1270 let mut query = op.query_params.clone();
1271 let mut body = op.sample_body.clone();
1272 match &target {
1273 InjectionTarget::Query(idx) => {
1274 if let Some(slot) = query.get_mut(*idx) {
1275 slot.1 = payload.payload.clone();
1276 }
1277 }
1278 InjectionTarget::BodyStringField(field) => {
1279 body = inject_into_body_field(body.as_deref(), field, &payload.payload);
1280 }
1281 }
1282 probes.push(OwaspProbe {
1283 label: format!("owasp:{}", cat),
1284 body,
1285 query,
1286 });
1287 }
1288 probes
1289}
1290
1291#[derive(Debug, Clone)]
1292enum InjectionTarget {
1293 Query(usize),
1294 BodyStringField(String),
1295}
1296
1297fn pick_injection_target(op: &AnnotatedOperation) -> Option<InjectionTarget> {
1298 if !op.query_params.is_empty() {
1299 return Some(InjectionTarget::Query(0));
1300 }
1301 let sample = op.sample_body.as_deref()?;
1302 let parsed: serde_json::Value = serde_json::from_str(sample).ok()?;
1303 let obj = parsed.as_object()?;
1304 for (k, v) in obj {
1305 if v.is_string() {
1306 return Some(InjectionTarget::BodyStringField(k.clone()));
1307 }
1308 }
1309 None
1310}
1311
1312/// Replace the value of `field` in a JSON-object body with `payload`.
1313/// Returns the mutated body as a JSON string. Returns `None` if the
1314/// body doesn't parse as a JSON object.
1315fn inject_into_body_field(body: Option<&str>, field: &str, payload: &str) -> Option<String> {
1316 let raw = body?;
1317 let mut parsed: serde_json::Value = serde_json::from_str(raw).ok()?;
1318 let obj = parsed.as_object_mut()?;
1319 obj.insert(field.to_string(), serde_json::json!(payload));
1320 serde_json::to_string(&parsed).ok()
1321}
1322
1323#[allow(clippy::too_many_arguments)]
1324/// Round 17.3 — one synthesised bad credential to send.
1325#[derive(Debug, Clone)]
1326struct SecurityProbe {
1327 /// Self-test label, e.g. `security:bad-bearer`.
1328 label: String,
1329 /// Headers to attach to the probe request.
1330 headers: Vec<(String, String)>,
1331 /// Query parameters to attach (API key in query case).
1332 query: Vec<(String, String)>,
1333}
1334
1335/// For each declared security scheme, produce one bad-credential
1336/// probe plus a single "no auth at all" probe that exercises the
1337/// missing-credential code path. Deduplicates by scheme kind so an
1338/// operation declaring `[bearer, bearer]` only yields one Bearer
1339/// probe.
1340fn build_security_probes(schemes: &[SecuritySchemeInfo]) -> Vec<SecurityProbe> {
1341 if schemes.is_empty() {
1342 return Vec::new();
1343 }
1344 let mut probes: Vec<SecurityProbe> = Vec::new();
1345 let mut seen_bearer = false;
1346 let mut seen_basic = false;
1347 // `(loc_tag, name)` — ApiKeyLocation doesn't implement Ord, so
1348 // we tag it with a short discriminant string for dedup.
1349 let mut seen_apikey: std::collections::BTreeSet<(&'static str, String)> = Default::default();
1350 for s in schemes {
1351 match s {
1352 SecuritySchemeInfo::Bearer if !seen_bearer => {
1353 seen_bearer = true;
1354 probes.push(SecurityProbe {
1355 label: "security:bad-bearer".into(),
1356 headers: vec![(
1357 "Authorization".into(),
1358 "Bearer self-test-invalid-token".into(),
1359 )],
1360 query: Vec::new(),
1361 });
1362 }
1363 SecuritySchemeInfo::Basic if !seen_basic => {
1364 seen_basic = true;
1365 // base64("self-test:invalid") — valid base64, wrong creds.
1366 probes.push(SecurityProbe {
1367 label: "security:bad-basic".into(),
1368 headers: vec![(
1369 "Authorization".into(),
1370 "Basic c2VsZi10ZXN0OmludmFsaWQ=".into(),
1371 )],
1372 query: Vec::new(),
1373 });
1374 }
1375 SecuritySchemeInfo::ApiKey { location, name } => {
1376 let loc_tag = match location {
1377 ApiKeyLocation::Header => "header",
1378 ApiKeyLocation::Query => "query",
1379 ApiKeyLocation::Cookie => "cookie",
1380 };
1381 if seen_apikey.contains(&(loc_tag, name.clone())) {
1382 continue;
1383 }
1384 seen_apikey.insert((loc_tag, name.clone()));
1385 let label = format!("security:bad-apikey:{}", name);
1386 let bad = "self-test-invalid-key".to_string();
1387 match location {
1388 ApiKeyLocation::Header => probes.push(SecurityProbe {
1389 label,
1390 headers: vec![(name.clone(), bad)],
1391 query: Vec::new(),
1392 }),
1393 ApiKeyLocation::Query => probes.push(SecurityProbe {
1394 label,
1395 headers: Vec::new(),
1396 query: vec![(name.clone(), bad)],
1397 }),
1398 ApiKeyLocation::Cookie => probes.push(SecurityProbe {
1399 label,
1400 headers: vec![("Cookie".into(), format!("{}={}", name, bad))],
1401 query: Vec::new(),
1402 }),
1403 }
1404 }
1405 _ => {}
1406 }
1407 }
1408 // Always add a "no auth at all" probe when *any* security scheme
1409 // is declared — useful even if all schemes failed to resolve to a
1410 // testable kind, because it surfaces validators that aren't
1411 // checking auth presence at all.
1412 probes.push(SecurityProbe {
1413 label: "security:no-auth".into(),
1414 headers: Vec::new(),
1415 query: Vec::new(),
1416 });
1417 probes
1418}
1419
1420/// Remove Authorization and any API-key headers declared by the
1421/// operation's security schemes from `headers`, so a security probe
1422/// can supply its own credential (or none) cleanly.
1423fn strip_auth(
1424 headers: &[(String, String)],
1425 schemes: &[SecuritySchemeInfo],
1426) -> Vec<(String, String)> {
1427 let mut apikey_headers: std::collections::BTreeSet<String> = Default::default();
1428 for s in schemes {
1429 if let SecuritySchemeInfo::ApiKey {
1430 location: ApiKeyLocation::Header,
1431 name,
1432 } = s
1433 {
1434 apikey_headers.insert(name.to_lowercase());
1435 }
1436 if let SecuritySchemeInfo::ApiKey {
1437 location: ApiKeyLocation::Cookie,
1438 ..
1439 } = s
1440 {
1441 apikey_headers.insert("cookie".into());
1442 }
1443 }
1444 headers
1445 .iter()
1446 .filter(|(k, _)| {
1447 let lk = k.to_lowercase();
1448 lk != "authorization" && !apikey_headers.contains(&lk)
1449 })
1450 .cloned()
1451 .collect()
1452}
1453
1454/// Remove API-key query parameters declared by the operation's
1455/// security schemes from `query`, so a probe can supply its own.
1456fn strip_auth_query(
1457 query: &[(String, String)],
1458 schemes: &[SecuritySchemeInfo],
1459) -> Vec<(String, String)> {
1460 let mut apikey_query: std::collections::BTreeSet<String> = Default::default();
1461 for s in schemes {
1462 if let SecuritySchemeInfo::ApiKey {
1463 location: ApiKeyLocation::Query,
1464 name,
1465 } = s
1466 {
1467 apikey_query.insert(name.clone());
1468 }
1469 }
1470 query.iter().filter(|(k, _)| !apikey_query.contains(k)).cloned().collect()
1471}
1472
1473/// Round 35 (#859) — Srikanth on 0.3.179: embedded-content variant-b
1474/// probes were flagging well-behaved 4xx responses as mismatches when
1475/// in reality only a 5xx (server CRASHED trying to parse the embedded
1476/// XML/YAML/multipart/urlencoded payload) is the bug the probe was
1477/// designed to find. Tristate replaces the older `expected_4xx: bool`
1478/// so variant-b probes can opt into "anything but 5xx is fine".
1479#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1480pub(crate) enum ExpectedOutcome {
1481 /// Positive probe: spec-compliant request, expect 2xx or 3xx.
1482 Success,
1483 /// Negative probe: invalid request, expect 4xx.
1484 ClientError,
1485 /// Embedded-content variant-b probe: spec-shape envelope with a
1486 /// non-JSON payload embedded in the first string field. Any
1487 /// response that isn't a 5xx is fine; the probe is here to catch
1488 /// server crashes on the embedded payload.
1489 NotServerError,
1490}
1491
1492impl ExpectedOutcome {
1493 /// Whether `actual_status` counts as a pass for this outcome.
1494 fn passes(self, actual_status: u16) -> bool {
1495 match self {
1496 ExpectedOutcome::Success => (200..400).contains(&actual_status),
1497 ExpectedOutcome::ClientError => (400..500).contains(&actual_status),
1498 ExpectedOutcome::NotServerError => {
1499 actual_status >= 200 && !(500..600).contains(&actual_status)
1500 }
1501 }
1502 }
1503
1504 /// Human-readable hint persisted in the JSONL capture + HTML
1505 /// viewer's "show mismatches only" filter; also what users `jq`
1506 /// against.
1507 fn as_str(self) -> &'static str {
1508 match self {
1509 ExpectedOutcome::Success => "2xx-3xx",
1510 ExpectedOutcome::ClientError => "4xx",
1511 ExpectedOutcome::NotServerError => "2xx-4xx",
1512 }
1513 }
1514}
1515
1516/// Variant of `send_case` that takes an explicit `extra_headers`
1517/// (rather than reading them from `config`). Used by security probes
1518/// to substitute or strip the configured Authorization header.
1519#[allow(clippy::too_many_arguments)]
1520async fn send_case_with_extra(
1521 client: &Client,
1522 config: &SelfTestConfig,
1523 method: Method,
1524 url: &str,
1525 label: &str,
1526 expected: ExpectedOutcome,
1527 body: Option<&str>,
1528 query: Vec<(String, String)>,
1529 headers: Vec<(String, String)>,
1530 extra_headers: Vec<(String, String)>,
1531 // Round 33 (#823) — spec path template (e.g. `/users/{id}`)
1532 // for the operation this probe belongs to. Stamped on the
1533 // capture so the per-endpoint summary can group by template.
1534 path_template: &str,
1535) -> CaseOutcome {
1536 let mut req = client.request(method.clone(), url);
1537 let mut capture_headers: BTreeMap<String, String> = BTreeMap::new();
1538 for (k, v) in &query {
1539 req = req.query(&[(k.as_str(), v.as_str())]);
1540 }
1541 // Round 36 (#876) — stamp the client side first so the same
1542 // `client_sent_at` string flows into both the request headers
1543 // (so the server-side `ServerConformanceViolation` records it
1544 // verbatim) and the on-disk `CaseCapture` JSONL line. Don't
1545 // re-call `Utc::now()` after `req.send()` — that would record
1546 // a different timestamp than the server sees.
1547 let mockforge_version = env!("CARGO_PKG_VERSION").to_string();
1548 let client_sent_at = chrono::Utc::now().to_rfc3339();
1549 // Round 28 — reqwest's `.header(k, v)` APPENDS rather than replaces
1550 // (.headers().insert() would replace but isn't on the builder).
1551 // The previous round-25 fix relied on "last-write-wins" semantics
1552 // that don't exist; for content-type-swap probes the request went
1553 // out with BOTH `Content-Type: application/json` AND `Content-Type:
1554 // application/xml`, and axum's `Json<>` extractor picked the JSON
1555 // one and accepted, so the server-side validator never saw the
1556 // mismatch. Build a `HeaderMap` ourselves so the override
1557 // replaces the body-block default exactly once.
1558 let mut final_headers: reqwest::header::HeaderMap = reqwest::header::HeaderMap::new();
1559 if let Some(_b) = body {
1560 if let Ok(v) = reqwest::header::HeaderValue::from_str("application/json") {
1561 final_headers.insert(reqwest::header::CONTENT_TYPE, v);
1562 }
1563 capture_headers.insert("Content-Type".to_string(), "application/json".to_string());
1564 }
1565 for (k, v) in &headers {
1566 if let (Ok(hn), Ok(hv)) = (
1567 reqwest::header::HeaderName::from_bytes(k.as_bytes()),
1568 reqwest::header::HeaderValue::from_str(v),
1569 ) {
1570 final_headers.insert(hn, hv);
1571 }
1572 capture_headers.insert(k.clone(), v.clone());
1573 }
1574 for (k, v) in &extra_headers {
1575 if let (Ok(hn), Ok(hv)) = (
1576 reqwest::header::HeaderName::from_bytes(k.as_bytes()),
1577 reqwest::header::HeaderValue::from_str(v),
1578 ) {
1579 final_headers.insert(hn, hv);
1580 }
1581 capture_headers.insert(k.clone(), v.clone());
1582 }
1583 // Round 36 (#876) — outbound client stamps. Inserted last so
1584 // they can't be clobbered by user-supplied extra-headers, and
1585 // recorded in `capture_headers` so the JSONL line shows the
1586 // exact bytes that went on the wire.
1587 {
1588 let v_header = mockforge_foundation::conformance_violations::CLIENT_VERSION_HEADER;
1589 let s_header = mockforge_foundation::conformance_violations::CLIENT_SENT_AT_HEADER;
1590 if let (Ok(hn), Ok(hv)) = (
1591 reqwest::header::HeaderName::from_bytes(v_header.as_bytes()),
1592 reqwest::header::HeaderValue::from_str(&mockforge_version),
1593 ) {
1594 final_headers.insert(hn, hv);
1595 }
1596 if let (Ok(hn), Ok(hv)) = (
1597 reqwest::header::HeaderName::from_bytes(s_header.as_bytes()),
1598 reqwest::header::HeaderValue::from_str(&client_sent_at),
1599 ) {
1600 final_headers.insert(hn, hv);
1601 }
1602 capture_headers.insert(v_header.to_string(), mockforge_version.clone());
1603 capture_headers.insert(s_header.to_string(), client_sent_at.clone());
1604 }
1605 if let Some(b) = body {
1606 req = req.body(b.to_string());
1607 }
1608 req = req.headers(final_headers);
1609 let (actual_status, response_capture) = match req.send().await {
1610 Ok(resp) => {
1611 let status = resp.status().as_u16();
1612 if let Some(sink) = &config.capture {
1613 let resp_headers: BTreeMap<String, String> = resp
1614 .headers()
1615 .iter()
1616 .map(|(k, v)| (k.as_str().to_string(), v.to_str().unwrap_or("").to_string()))
1617 .collect();
1618 let text = resp.text().await.unwrap_or_default();
1619 let (rb, truncated) = truncate_body_for_capture(&text);
1620 (status, Some((Some((rb, truncated)), resp_headers, None, sink.clone())))
1621 } else {
1622 (status, None)
1623 }
1624 }
1625 Err(e) => {
1626 let err_str = e.to_string();
1627 if let Some(sink) = &config.capture {
1628 (0, Some((None, BTreeMap::new(), Some(err_str), sink.clone())))
1629 } else {
1630 (0, None)
1631 }
1632 }
1633 };
1634 let passed = expected.passes(actual_status);
1635 if let Some((resp_body, resp_headers, error, sink)) = response_capture {
1636 let (request_body, request_body_truncated) = match body {
1637 Some(b) => {
1638 let (rb, t) = truncate_body_for_capture(b);
1639 (Some(rb), t)
1640 }
1641 None => (None, false),
1642 };
1643 let (response_body, response_body_truncated) = match resp_body {
1644 Some((rb, t)) => (Some(rb), t),
1645 None => (None, false),
1646 };
1647 let entry = CaseCapture {
1648 label: label.to_string(),
1649 method: method.to_string(),
1650 url: build_query_url(url, &query),
1651 request_headers: capture_headers,
1652 request_body,
1653 request_body_truncated,
1654 response_status: actual_status,
1655 response_headers: resp_headers,
1656 response_body,
1657 response_body_truncated,
1658 error,
1659 // Filled in by the per-operation validation pass after
1660 // every probe finishes; the capture itself is unaware of
1661 // the schema map.
1662 response_schema_error: None,
1663 // Round 28 — derive the expected range from the probe's
1664 // outcome shape so the JSONL line and HTML viewer can
1665 // filter mismatches without re-deriving on the read side.
1666 // Round 35 (#859) — add a third value `"2xx-4xx"` for
1667 // embedded-content variant-b probes whose only failure
1668 // mode is a 5xx server crash.
1669 expected_status_range: expected.as_str().to_string(),
1670 // Round 33 (#823) — path_template carries the spec's
1671 // pre-substitution path so the per-endpoint summary can
1672 // collapse `/users/X` and `/users/Y` into one row.
1673 // spec_label is constant per run, read from the config.
1674 path_template: path_template.to_string(),
1675 spec_label: config.spec_label.clone(),
1676 // Round 36 (#876) — same values that went on the wire as
1677 // request headers, so a server-side
1678 // `ServerConformanceViolation` recorded with
1679 // `client_mockforge_version` + `client_sent_at` matches
1680 // the JSONL line byte-for-byte.
1681 mockforge_version: mockforge_version.clone(),
1682 client_sent_at: client_sent_at.clone(),
1683 };
1684 if let Ok(mut guard) = sink.lock() {
1685 guard.push(entry);
1686 }
1687 }
1688 // Round 35 (#859) — keep the `expected_4xx` field on `CaseOutcome`
1689 // semantically tied to "negative probe expecting 400-class", so
1690 // downstream code in `report_html.rs` doesn't have to learn about
1691 // the new tristate. `NotServerError` reports as `expected_4xx:
1692 // false` (it's a positive probe in spirit) and instead carries
1693 // its outcome through the per-capture `expected_status_range`.
1694 let expected_4xx = matches!(expected, ExpectedOutcome::ClientError);
1695 CaseOutcome {
1696 label: label.to_string(),
1697 expected_4xx,
1698 actual_status,
1699 passed,
1700 }
1701}
1702
1703// HTTP request shape needs all of these: client, config (for capture
1704// sink + extra headers), method, url, label (probe id), expected_4xx
1705// (pass/fail decision), body, query, headers. A struct wrapper would
1706// just move the arity from positional to field access without making
1707// the call sites clearer.
1708#[allow(clippy::too_many_arguments)]
1709async fn send_case(
1710 client: &Client,
1711 config: &SelfTestConfig,
1712 method: Method,
1713 url: &str,
1714 label: &str,
1715 expected: ExpectedOutcome,
1716 body: Option<&str>,
1717 query: Vec<(String, String)>,
1718 headers: Vec<(String, String)>,
1719 path_template: &str,
1720) -> CaseOutcome {
1721 // Forwarding to `send_case_with_extra` keeps the capture logic in
1722 // one place so request/response tracing can't drift between the
1723 // two entrypoints.
1724 send_case_with_extra(
1725 client,
1726 config,
1727 method,
1728 url,
1729 label,
1730 expected,
1731 body,
1732 query,
1733 headers,
1734 config.extra_headers.clone(),
1735 path_template,
1736 )
1737 .await
1738}
1739
1740/// Round 23 (c-iii) — rebuild the query-stringified URL for capture so
1741/// the JSONL trace shows the URL that actually went over the wire
1742/// (reqwest applies `.query(..)` after the request URL string is
1743/// rendered, so capturing the raw `url` argument alone loses the
1744/// query params).
1745fn build_query_url(base: &str, query: &[(String, String)]) -> String {
1746 if query.is_empty() {
1747 return base.to_string();
1748 }
1749 let qs: String = query
1750 .iter()
1751 .map(|(k, v)| format!("{}={}", urlencoding::encode(k), urlencoding::encode(v)))
1752 .collect::<Vec<_>>()
1753 .join("&");
1754 if base.contains('?') {
1755 format!("{base}&{qs}")
1756 } else {
1757 format!("{base}?{qs}")
1758 }
1759}
1760
1761/// Substitute `{param}` placeholders in the spec path with their
1762/// sample values from `path_params`, then prepend `target_url`. Empty
1763/// values are kept as `{param}` so an upstream router still matches
1764/// the template — useful when `path_params` is empty and we want to
1765/// hit the same route the spec defines.
1766///
1767/// All current call sites went through `build_url_with_base` after
1768/// round 18.1, so this no-base-path helper is unused; keep it as the
1769/// documented shim for future external callers (one-arg simplification).
1770#[allow(dead_code)]
1771fn build_url(target: &str, path_template: &str, path_params: &[(String, String)]) -> String {
1772 build_url_with_base(target, None, path_template, path_params)
1773}
1774
1775/// Round 18.1 — variant of `build_url` that takes a `base_path`
1776/// (e.g. `Some("/api")`). When set, prepends it to the spec path so a
1777/// spec declaring `/users` against a target served behind `/api`
1778/// resolves to `<target>/api/users`. `base_path` is normalised: leading
1779/// `/` is auto-added, trailing `/` is stripped.
1780fn build_url_with_base(
1781 target: &str,
1782 base_path: Option<&str>,
1783 path_template: &str,
1784 path_params: &[(String, String)],
1785) -> String {
1786 let mut url = path_template.to_string();
1787 for (name, value) in path_params {
1788 let placeholder = format!("{{{}}}", name);
1789 if !value.is_empty() {
1790 url = url.replace(&placeholder, value);
1791 }
1792 }
1793 let target = target.trim_end_matches('/');
1794 let prefix = match base_path {
1795 Some(bp) if !bp.is_empty() => {
1796 let trimmed = bp.trim_end_matches('/');
1797 if trimmed.starts_with('/') {
1798 trimmed.to_string()
1799 } else {
1800 format!("/{}", trimmed)
1801 }
1802 }
1803 _ => String::new(),
1804 };
1805 let path = if url.starts_with('/') {
1806 url
1807 } else {
1808 format!("/{url}")
1809 };
1810 format!("{target}{prefix}{path}")
1811}
1812
1813#[cfg(test)]
1814mod tests {
1815 use super::*;
1816
1817 fn op(
1818 method: &str,
1819 path: &str,
1820 body: Option<&str>,
1821 query: Vec<(&str, &str)>,
1822 headers: Vec<(&str, &str)>,
1823 path_params: Vec<(&str, &str)>,
1824 ) -> AnnotatedOperation {
1825 AnnotatedOperation {
1826 method: method.into(),
1827 path: path.into(),
1828 features: Vec::new(),
1829 request_body_content_type: body.map(|_| "application/json".into()),
1830 sample_body: body.map(|s| s.to_string()),
1831 query_params: query.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1832 header_params: headers.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1833 path_params: path_params.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1834 response_schema: None,
1835 response_schemas: std::collections::BTreeMap::new(),
1836 request_body_schema: None,
1837 security_schemes: Vec::new(),
1838 }
1839 }
1840
1841 /// Round 36 (#876) — older JSONL lines (written before the stamp
1842 /// fields existed) must still deserialise without error and
1843 /// default to empty strings. Prevents a back-compat regression
1844 /// the next time we extend `CaseCapture`.
1845 #[test]
1846 fn case_capture_back_compat_when_stamp_fields_missing() {
1847 let pre_r36 = serde_json::json!({
1848 "label": "positive",
1849 "method": "GET",
1850 "url": "http://api/users",
1851 "request_headers": {},
1852 "request_body_truncated": false,
1853 "response_status": 200,
1854 "response_headers": {},
1855 "response_body_truncated": false,
1856 });
1857 let capture: CaseCapture =
1858 serde_json::from_value(pre_r36).expect("pre-r36 payload must deserialise");
1859 assert!(capture.mockforge_version.is_empty(), "default to empty");
1860 assert!(capture.client_sent_at.is_empty(), "default to empty");
1861 }
1862
1863 /// Round 36 (#876) — when the bench stamps fields itself (the
1864 /// happy path), they round-trip through serde unchanged. Pins
1865 /// the on-wire shape so tooling that grep's `mockforge_version`
1866 /// out of the JSONL stays valid.
1867 #[test]
1868 fn case_capture_stamps_round_trip_through_serde() {
1869 let stamped = CaseCapture {
1870 label: "positive".into(),
1871 method: "GET".into(),
1872 url: "http://api/users".into(),
1873 request_headers: BTreeMap::new(),
1874 request_body: None,
1875 request_body_truncated: false,
1876 response_status: 200,
1877 response_headers: BTreeMap::new(),
1878 response_body: None,
1879 response_body_truncated: false,
1880 error: None,
1881 response_schema_error: None,
1882 expected_status_range: "2xx-3xx".into(),
1883 path_template: "/users".into(),
1884 spec_label: None,
1885 mockforge_version: "0.3.183".into(),
1886 client_sent_at: "2026-06-17T12:34:56+00:00".into(),
1887 };
1888 let json = serde_json::to_string(&stamped).unwrap();
1889 assert!(json.contains("\"mockforge_version\":\"0.3.183\""));
1890 assert!(json.contains("\"client_sent_at\":\"2026-06-17T12:34:56+00:00\""));
1891 let back: CaseCapture = serde_json::from_str(&json).unwrap();
1892 assert_eq!(back.mockforge_version, "0.3.183");
1893 assert_eq!(back.client_sent_at, "2026-06-17T12:34:56+00:00");
1894 }
1895
1896 #[test]
1897 fn build_url_substitutes_path_params() {
1898 let url = build_url(
1899 "https://api.test/",
1900 "/users/{id}/posts/{pid}",
1901 &[("id".into(), "42".into()), ("pid".into(), "7".into())],
1902 );
1903 assert_eq!(url, "https://api.test/users/42/posts/7");
1904 }
1905
1906 /// Round 18.1 — a run where every positive 404s should be flagged
1907 /// as a likely target misconfiguration, not silently treated as a
1908 /// successful conformance run.
1909 #[test]
1910 fn detect_target_misconfiguration_when_all_positives_share_status() {
1911 let mut report = SelfTestReport {
1912 positive_pass: 0,
1913 positive_fail: 50,
1914 ..Default::default()
1915 };
1916 for i in 0..50 {
1917 report.operations.push(OperationResult {
1918 method: "GET".into(),
1919 path: format!("/r/{i}"),
1920 positive: Some(CaseOutcome {
1921 label: "positive".into(),
1922 expected_4xx: false,
1923 actual_status: 404,
1924 passed: false,
1925 }),
1926 negatives: Vec::new(),
1927 });
1928 }
1929 assert_eq!(report.detect_target_misconfiguration(), Some(404));
1930 }
1931
1932 #[test]
1933 fn detect_target_misconfiguration_returns_none_when_some_pass() {
1934 let mut report = SelfTestReport {
1935 positive_pass: 5,
1936 positive_fail: 50,
1937 ..Default::default()
1938 };
1939 for i in 0..55 {
1940 report.operations.push(OperationResult {
1941 method: "GET".into(),
1942 path: format!("/r/{i}"),
1943 positive: Some(CaseOutcome {
1944 label: "positive".into(),
1945 expected_4xx: false,
1946 actual_status: if i < 5 { 200 } else { 404 },
1947 passed: i < 5,
1948 }),
1949 negatives: Vec::new(),
1950 });
1951 }
1952 assert_eq!(report.detect_target_misconfiguration(), None);
1953 }
1954
1955 /// Round 18.1 — `--base-path /api` should prepend `/api` to
1956 /// every spec path. Pre-fix, the self-test ignored base_path and
1957 /// 404'd every positive when the deployed API was behind a path
1958 /// prefix.
1959 #[test]
1960 fn build_url_applies_base_path_when_present() {
1961 let url = build_url_with_base(
1962 "https://api.example.com",
1963 Some("/api"),
1964 "/users/{id}",
1965 &[("id".into(), "42".into())],
1966 );
1967 assert_eq!(url, "https://api.example.com/api/users/42");
1968 }
1969
1970 /// Round 18.1 — base_path is normalised: missing leading slash
1971 /// gets one added, trailing slash is stripped, empty string is
1972 /// the same as None.
1973 #[test]
1974 fn build_url_normalises_base_path() {
1975 let no_slash = build_url_with_base("https://t", Some("api"), "/x", &[]);
1976 assert_eq!(no_slash, "https://t/api/x");
1977 let trailing = build_url_with_base("https://t", Some("/api/"), "/x", &[]);
1978 assert_eq!(trailing, "https://t/api/x");
1979 let empty = build_url_with_base("https://t", Some(""), "/x", &[]);
1980 assert_eq!(empty, "https://t/x");
1981 let none = build_url_with_base("https://t", None, "/x", &[]);
1982 assert_eq!(none, "https://t/x");
1983 }
1984
1985 #[test]
1986 fn build_url_keeps_placeholders_when_no_sample() {
1987 let url = build_url("https://api.test", "/users/{id}", &[]);
1988 assert_eq!(url, "https://api.test/users/{id}");
1989 }
1990
1991 #[test]
1992 fn report_summary_calls_out_misses() {
1993 let r = SelfTestReport {
1994 positive_pass: 3,
1995 positive_fail: 0,
1996 negative_caught: BTreeMap::from([("request-body".into(), 2)]),
1997 negative_missed: BTreeMap::from([("request-body".into(), 1)]),
1998 operations: Vec::new(),
1999 };
2000 let summary = r.render_summary();
2001 assert!(summary.contains("Positives: 3 pass / 0 fail"));
2002 assert!(summary.contains("Negatives [request-body]: 2 caught / 1 missed"));
2003 assert!(summary.contains("⚠"));
2004 assert!(!r.all_passed());
2005 }
2006
2007 #[test]
2008 fn report_all_passed_when_no_miss() {
2009 let r = SelfTestReport {
2010 positive_pass: 5,
2011 positive_fail: 0,
2012 negative_caught: BTreeMap::from([("parameters".into(), 3)]),
2013 negative_missed: BTreeMap::new(),
2014 operations: Vec::new(),
2015 };
2016 assert!(r.all_passed());
2017 assert!(r.render_summary().contains("✓"));
2018 }
2019
2020 #[tokio::test]
2021 async fn run_self_test_against_unreachable_target_marks_all_failed() {
2022 // Use an obviously-dead port so we exercise the timeout/error
2023 // path without needing a live server in tests.
2024 let cfg = SelfTestConfig {
2025 target_url: "http://127.0.0.1:1".into(),
2026 timeout: Duration::from_millis(200),
2027 ..Default::default()
2028 };
2029 let ops = vec![op(
2030 "POST",
2031 "/users",
2032 Some("{\"name\":\"a\"}"),
2033 vec![],
2034 vec![],
2035 vec![],
2036 )];
2037 let report = run_self_test(&ops, &cfg).await.expect("client builds");
2038 // All cases hit the connect-error path → actual_status=0.
2039 // Positive expects 2xx-3xx → 0 is fail. Negatives expect 4xx
2040 // → 0 is also fail (we missed catching).
2041 assert_eq!(report.positive_fail, 1);
2042 assert!(report.negative_missed.values().sum::<usize>() >= 1);
2043 assert!(!report.all_passed());
2044 }
2045
2046 /// Round 17.2 — operations with both a positive sample AND a
2047 /// resolved request-body schema produce schema-driven negatives
2048 /// in addition to the spec-agnostic empty/wrong-type ones. The
2049 /// labels carry the field path so a per-category report can tell
2050 /// you exactly which field caught.
2051 #[tokio::test]
2052 async fn schema_driven_negatives_fire_when_schema_present() {
2053 use openapiv3::{ObjectType, ReferenceOr, Schema, SchemaData, SchemaKind, Type};
2054 let cfg = SelfTestConfig {
2055 target_url: "http://127.0.0.1:1".into(),
2056 timeout: Duration::from_millis(200),
2057 ..Default::default()
2058 };
2059 // Build an operation whose schema has a required `name` string
2060 // and an `age` integer. The mutator should produce, at
2061 // minimum: required-removed:name, required-removed:age,
2062 // type-mismatch:name, type-mismatch:age, integer-as-float:age,
2063 // plus the root-level type-mismatch.
2064 let mut obj = ObjectType::default();
2065 obj.properties.insert(
2066 "name".to_string(),
2067 ReferenceOr::Item(Box::new(Schema {
2068 schema_data: SchemaData::default(),
2069 schema_kind: SchemaKind::Type(Type::String(Default::default())),
2070 })),
2071 );
2072 obj.properties.insert(
2073 "age".to_string(),
2074 ReferenceOr::Item(Box::new(Schema {
2075 schema_data: SchemaData::default(),
2076 schema_kind: SchemaKind::Type(Type::Integer(Default::default())),
2077 })),
2078 );
2079 obj.required = vec!["name".into(), "age".into()];
2080 let schema = Schema {
2081 schema_data: SchemaData::default(),
2082 schema_kind: SchemaKind::Type(Type::Object(obj)),
2083 };
2084
2085 let mut o =
2086 op("POST", "/users", Some(r#"{"name":"Ada","age":30}"#), vec![], vec![], vec![]);
2087 o.request_body_schema = Some(schema);
2088 let report = run_self_test(&[o], &cfg).await.expect("client builds");
2089 // Bucket labels from the operation result.
2090 let labels: std::collections::BTreeSet<String> = report
2091 .operations
2092 .iter()
2093 .flat_map(|op| op.negatives.iter().map(|n| n.label.clone()))
2094 .collect();
2095 assert!(
2096 labels.iter().any(|l| l.starts_with("request-body:type-mismatch:")),
2097 "missing type-mismatch negative; got {labels:?}"
2098 );
2099 assert!(
2100 labels.iter().any(|l| l.starts_with("request-body:required-removed:")),
2101 "missing required-removed negative; got {labels:?}"
2102 );
2103 assert!(
2104 labels.iter().any(|l| l == "parameters:uri-too-long"),
2105 "missing URI-length negative; got {labels:?}"
2106 );
2107 }
2108
2109 /// Round 16 — operations with a body OR a path-param now produce
2110 /// negatives even without a sample body. Previously a POST whose
2111 /// body annotator failed produced *zero* negatives, so the self-test
2112 /// always reported "all passing" for that endpoint.
2113 #[tokio::test]
2114 async fn no_sample_body_still_produces_request_body_negatives() {
2115 let cfg = SelfTestConfig {
2116 target_url: "http://127.0.0.1:1".into(),
2117 timeout: Duration::from_millis(200),
2118 ..Default::default()
2119 };
2120 // POST with a body content type but no sample (annotator gap).
2121 let ops = vec![op("POST", "/x", None, vec![], vec![], vec![])];
2122 // No sample_body but request_body_content_type set:
2123 let mut ops_fixed = ops;
2124 ops_fixed[0].request_body_content_type = Some("application/json".into());
2125 let report = run_self_test(&ops_fixed, &cfg).await.expect("client builds");
2126 // Both request-body negatives (empty + wrong-type) should fire,
2127 // landing in `negative_missed` because the unreachable target
2128 // returns no 4xx. The point: count > 0.
2129 assert!(
2130 report.negative_missed.values().sum::<usize>() >= 2,
2131 "expected ≥2 request-body negatives, got {:?}",
2132 report.negative_missed
2133 );
2134 }
2135
2136 /// Round 16 — operations with a path-param now get a probe even
2137 /// when there's no body / required query / required header.
2138 /// Previously `/teams/{team-id}` with no other required fields
2139 /// produced zero negatives → always "all passing".
2140 #[tokio::test]
2141 async fn path_param_only_endpoint_produces_a_probe() {
2142 let cfg = SelfTestConfig {
2143 target_url: "http://127.0.0.1:1".into(),
2144 timeout: Duration::from_millis(200),
2145 ..Default::default()
2146 };
2147 let ops = vec![op(
2148 "GET",
2149 "/teams/{team-id}",
2150 None,
2151 vec![],
2152 vec![],
2153 vec![("team-id", "1")],
2154 )];
2155 let report = run_self_test(&ops, &cfg).await.expect("client builds");
2156 let total: usize = report.negative_caught.values().sum::<usize>()
2157 + report.negative_missed.values().sum::<usize>();
2158 assert!(total >= 1, "expected ≥1 path-param probe, got {:?}", report);
2159 }
2160
2161 /// Round 18.5 — when `geo_ip` is set, every default forwarded-
2162 /// IP header gets the IP appended (X-Forwarded-For,
2163 /// True-Client-IP, CF-Connecting-IP).
2164 #[test]
2165 fn effective_op_headers_appends_geo_ip_to_default_headers() {
2166 let ip: IpAddr = "203.0.113.42".parse().unwrap();
2167 let headers = effective_op_headers(
2168 &[("Accept".into(), "application/json".into())],
2169 Some(ip),
2170 &default_geo_source_headers(),
2171 );
2172 let names: Vec<&str> = headers.iter().map(|(k, _)| k.as_str()).collect();
2173 assert!(names.contains(&"Accept"));
2174 assert!(names.contains(&"X-Forwarded-For"));
2175 assert!(names.contains(&"True-Client-IP"));
2176 assert!(names.contains(&"CF-Connecting-IP"));
2177 // Every geo header carries the same IP value.
2178 let geo_values: Vec<&str> =
2179 headers.iter().filter(|(k, _)| k != "Accept").map(|(_, v)| v.as_str()).collect();
2180 for v in geo_values {
2181 assert_eq!(v, "203.0.113.42");
2182 }
2183 }
2184
2185 /// Round 18.5 — operations that already declare a forwarded-IP
2186 /// header (rare but legal — some specs hard-code one) keep their
2187 /// declared value; we don't clobber the spec.
2188 #[test]
2189 fn effective_op_headers_respects_spec_declared_header() {
2190 let ip: IpAddr = "203.0.113.99".parse().unwrap();
2191 let headers = effective_op_headers(
2192 &[("x-forwarded-for".into(), "10.0.0.1".into())],
2193 Some(ip),
2194 &["X-Forwarded-For".to_string()],
2195 );
2196 // The spec's lower-case value wins; we shouldn't add a
2197 // second X-Forwarded-For row that overrides it.
2198 let xff: Vec<&str> = headers
2199 .iter()
2200 .filter(|(k, _)| k.eq_ignore_ascii_case("x-forwarded-for"))
2201 .map(|(_, v)| v.as_str())
2202 .collect();
2203 assert_eq!(xff, vec!["10.0.0.1"]);
2204 }
2205
2206 /// Round 18.5 — None geo_ip and/or empty header list is a no-op.
2207 #[test]
2208 fn effective_op_headers_is_a_noop_without_geo_ip() {
2209 let base = vec![("Accept".into(), "json".into())];
2210 let h1 = effective_op_headers(&base, None, &default_geo_source_headers());
2211 assert_eq!(h1, base);
2212 let ip: IpAddr = "10.0.0.1".parse().unwrap();
2213 let h2 = effective_op_headers(&base, Some(ip), &[]);
2214 assert_eq!(h2, base);
2215 }
2216
2217 /// Round 18.5 — empty `source_ips` builds a single default
2218 /// client; a non-empty list builds N clients each attempting to
2219 /// bind. We can't reliably test the actual bind on CI (no
2220 /// loopback aliases), but a loopback IP is always bind-able.
2221 #[test]
2222 fn build_client_pool_one_per_source_ip() {
2223 let mut cfg = SelfTestConfig {
2224 target_url: "http://127.0.0.1:1".into(),
2225 timeout: Duration::from_millis(200),
2226 ..Default::default()
2227 };
2228 // Empty → one default client.
2229 assert_eq!(build_client_pool(&cfg).expect("default builds").len(), 1);
2230 // Non-empty → one per IP. Loopback bind is portable.
2231 cfg.source_ips = vec!["127.0.0.1".parse().unwrap()];
2232 assert_eq!(build_client_pool(&cfg).expect("bind loopback").len(), 1);
2233 }
2234
2235 /// Round 18.5 — geo IPs round-robin across operations. Hits an
2236 /// unreachable target so we can inspect the case outcomes; the
2237 /// point is to confirm `op_headers` carried the geo IP through
2238 /// (CaseOutcome doesn't surface headers directly, so we just
2239 /// verify the run completes without panicking and the result
2240 /// shape is correct when source_ips is non-empty too).
2241 #[tokio::test]
2242 async fn run_self_test_with_geo_source_completes() {
2243 let cfg = SelfTestConfig {
2244 target_url: "http://127.0.0.1:1".into(),
2245 timeout: Duration::from_millis(200),
2246 geo_source_ips: vec![
2247 "203.0.113.1".parse().unwrap(),
2248 "203.0.113.2".parse().unwrap(),
2249 ],
2250 ..Default::default()
2251 };
2252 let ops = vec![
2253 op("GET", "/a", None, vec![], vec![], vec![]),
2254 op("GET", "/b", None, vec![], vec![], vec![]),
2255 op("GET", "/c", None, vec![], vec![], vec![]),
2256 ];
2257 let report = run_self_test(&ops, &cfg).await.expect("client builds");
2258 assert_eq!(report.operations.len(), 3);
2259 }
2260
2261 /// Round 24 (f) — Srikanth saw the geo header on positive probes
2262 /// only; the four negative-probe call sites were passing
2263 /// `op.header_params` directly instead of `op_headers`, so the
2264 /// geo IP got dropped. This test runs a self-test that includes
2265 /// negative probes (uri-too-long, missing-query, etc.) under
2266 /// `--conformance-self-test-capture`, then asserts that EVERY
2267 /// captured probe (positive AND negative) carries one of the
2268 /// configured forwarded-IP headers.
2269 #[tokio::test]
2270 async fn geo_headers_present_on_every_probe_with_capture() {
2271 let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2272 let cfg = SelfTestConfig {
2273 target_url: "http://127.0.0.1:1".into(),
2274 timeout: Duration::from_millis(50),
2275 geo_source_ips: vec!["203.0.113.5".parse().unwrap()],
2276 capture: Some(sink.clone()),
2277 ..Default::default()
2278 };
2279 // An operation rich enough to trip several negative-probe
2280 // branches: header param (→ missing-header), query param
2281 // (→ missing-query), and a sample body (→ schema mutations
2282 // wouldn't fire without a schema, but uri-too-long always
2283 // does).
2284 let ops = vec![op(
2285 "GET",
2286 "/items",
2287 Some("{}"),
2288 vec![("id", "1")],
2289 vec![("X-Trace", "x")],
2290 vec![],
2291 )];
2292 let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2293 let captures = sink.lock().unwrap();
2294 assert!(!captures.is_empty(), "self-test should record probes");
2295 // For every captured probe, at least one of the default geo
2296 // headers must be present and equal to the configured IP.
2297 let geo_headers: std::collections::HashSet<&str> =
2298 ["X-Forwarded-For", "True-Client-IP", "CF-Connecting-IP"].into_iter().collect();
2299 for c in captures.iter() {
2300 let has_geo = c
2301 .request_headers
2302 .iter()
2303 .any(|(k, v)| geo_headers.contains(k.as_str()) && v == "203.0.113.5");
2304 assert!(
2305 has_geo,
2306 "probe `{}` is missing the geo IP header; got headers: {:?}",
2307 c.label, c.request_headers
2308 );
2309 }
2310 }
2311
2312 /// Round 25 (k) — operations with a JSON request body now get four
2313 /// content-type-swap probes (xml / yaml / multipart / urlencoded).
2314 /// Verify they:
2315 /// 1. fire only when the operation declares a JSON body
2316 /// 2. carry the wrong Content-Type the probe is testing for
2317 /// 3. don't fire on body-less operations
2318 #[tokio::test]
2319 async fn content_type_swap_probes_fire_for_json_bodies() {
2320 let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2321 let cfg = SelfTestConfig {
2322 target_url: "http://127.0.0.1:1".into(),
2323 timeout: Duration::from_millis(50),
2324 capture: Some(sink.clone()),
2325 ..Default::default()
2326 };
2327 let ops = vec![
2328 op("POST", "/users", Some("{\"name\":\"a\"}"), vec![], vec![], vec![]),
2329 op("GET", "/ping", None, vec![], vec![], vec![]),
2330 ];
2331 let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2332 let captures = sink.lock().unwrap();
2333
2334 let swap_labels: Vec<&str> = captures
2335 .iter()
2336 .filter(|c| c.label.starts_with("request-body:content-type-mismatch:"))
2337 .map(|c| c.label.as_str())
2338 .collect();
2339 assert_eq!(
2340 swap_labels.len(),
2341 4,
2342 "expected 4 content-type-swap probes (one per variant), got: {swap_labels:?}"
2343 );
2344 let expected_labels = [
2345 "request-body:content-type-mismatch:xml",
2346 "request-body:content-type-mismatch:yaml",
2347 "request-body:content-type-mismatch:multipart",
2348 "request-body:content-type-mismatch:urlencoded",
2349 ];
2350 for want in expected_labels {
2351 assert!(swap_labels.contains(&want), "missing swap probe `{want}`");
2352 }
2353
2354 // Each swap probe must carry the wrong Content-Type it's
2355 // testing for — that's the whole point.
2356 for c in captures.iter() {
2357 let Some(suffix) = c.label.strip_prefix("request-body:content-type-mismatch:") else {
2358 continue;
2359 };
2360 let want_ct = match suffix {
2361 "xml" => "application/xml",
2362 "yaml" => "application/yaml",
2363 "multipart" => "multipart/form-data",
2364 "urlencoded" => "application/x-www-form-urlencoded",
2365 _ => continue,
2366 };
2367 let got_ct = c
2368 .request_headers
2369 .iter()
2370 .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
2371 .map(|(_, v)| v.as_str())
2372 .unwrap_or("");
2373 assert_eq!(got_ct, want_ct, "swap probe `{}` sent wrong CT", c.label);
2374 }
2375
2376 // The body-less operation must NOT produce content-type-swap
2377 // probes (no body → no content type to lie about).
2378 let body_less_swaps = captures
2379 .iter()
2380 .filter(|c| {
2381 c.label.starts_with("request-body:content-type-mismatch:")
2382 && c.url.ends_with("/ping")
2383 })
2384 .count();
2385 assert_eq!(
2386 body_less_swaps, 0,
2387 "GET /ping has no request body; should not produce content-type-swap probes"
2388 );
2389 }
2390
2391 /// Round 27 (k variant b) — Srikanth's round-23 follow-up on (k):
2392 /// JSON envelope with embedded non-JSON field values. For each
2393 /// JSON-body operation, four extra probes fire that send valid
2394 /// JSON with an XML/YAML/multipart/urlencoded snippet stuffed
2395 /// into a string field. Content-Type stays `application/json`;
2396 /// expected is 2xx-3xx (the body parses); a 5xx flags a server
2397 /// that crashed on the embedded content.
2398 #[tokio::test]
2399 async fn embedded_content_probes_fire_with_honest_content_type() {
2400 let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2401 let cfg = SelfTestConfig {
2402 target_url: "http://127.0.0.1:1".into(),
2403 timeout: Duration::from_millis(50),
2404 capture: Some(sink.clone()),
2405 ..Default::default()
2406 };
2407 let ops = vec![op(
2408 "POST",
2409 "/users",
2410 Some("{\"name\":\"alice\",\"age\":30}"),
2411 vec![],
2412 vec![],
2413 vec![],
2414 )];
2415 let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2416 let captures = sink.lock().unwrap();
2417 let embedded: Vec<&CaseCapture> = captures
2418 .iter()
2419 .filter(|c| c.label.starts_with("request-body:embedded-content:"))
2420 .collect();
2421 assert_eq!(
2422 embedded.len(),
2423 4,
2424 "expected 4 embedded-content probes, got: {:?}",
2425 embedded.iter().map(|c| &c.label).collect::<Vec<_>>()
2426 );
2427 // Every embedded probe must carry the honest application/json
2428 // Content-Type (NOT lie like the variant-a content-type-swap
2429 // probes do) and a request body that still parses as JSON.
2430 for c in &embedded {
2431 let ct = c
2432 .request_headers
2433 .iter()
2434 .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
2435 .map(|(_, v)| v.as_str())
2436 .unwrap_or("");
2437 assert!(
2438 ct.contains("application/json"),
2439 "embedded probe `{}` should keep Content-Type honest, got {ct}",
2440 c.label
2441 );
2442 let body = c.request_body.as_deref().unwrap_or("");
2443 assert!(
2444 serde_json::from_str::<serde_json::Value>(body).is_ok(),
2445 "embedded probe `{}` body should still be valid JSON, got: {body}",
2446 c.label
2447 );
2448 }
2449 }
2450
2451 /// `embed_payload_in_first_string_field` walks objects depth-first
2452 /// and replaces only the FIRST string-valued leaf, leaving the
2453 /// surrounding structure intact.
2454 #[test]
2455 fn embed_payload_replaces_first_string_only() {
2456 let sample = r#"{"name":"alice","age":30,"tags":["admin","user"]}"#;
2457 let mutated = embed_payload_in_first_string_field(sample, "<x/>")
2458 .expect("string field present so probe constructed");
2459 let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2460 assert_eq!(v["name"], serde_json::json!("<x/>"));
2461 // age stays an integer (not stringified by the mutation).
2462 assert_eq!(v["age"], serde_json::json!(30));
2463 // tags array's strings stay untouched (we only replace the
2464 // first encountered string leaf, depth-first).
2465 assert_eq!(v["tags"][0], serde_json::json!("admin"));
2466 assert_eq!(v["tags"][1], serde_json::json!("user"));
2467 }
2468
2469 /// Round 34 (#829) — Srikanth on 0.3.178: when the positive
2470 /// sample has NO string field, the previous `{"data": <snippet>}`
2471 /// fallback produced an envelope that doesn't match real-API
2472 /// schemas (e.g. vCenter's `consolecli` PUT wants
2473 /// `{enabled: bool}`), so the server correctly 400'd and the
2474 /// bench misreported the 2xx-3xx expectation. Now we return None
2475 /// and the caller skips the probe.
2476 #[test]
2477 fn embed_payload_returns_none_when_no_string_field() {
2478 let no_strings = r#"{"a":1,"b":[2,3]}"#;
2479 assert!(embed_payload_in_first_string_field(no_strings, "<x><y></y></x>").is_none());
2480 // The exact vCenter-style case Srikanth hit.
2481 let bool_only = r#"{"enabled":true}"#;
2482 assert!(embed_payload_in_first_string_field(bool_only, "<x/>").is_none());
2483 }
2484
2485 #[test]
2486 fn embed_payload_returns_none_for_invalid_json_sample() {
2487 assert!(embed_payload_in_first_string_field("garbage", "a=1&b=2").is_none());
2488 }
2489
2490 /// Round 35 (#859) — Srikanth on 0.3.179 saw variant-b probes flag
2491 /// every 4xx as a mismatch when the spec field had a `pattern` /
2492 /// `format` validator that correctly rejected the embedded
2493 /// payload. The probe was only ever meant to catch 5xx (server
2494 /// crashed parsing the embedded content); 4xx is the well-behaved
2495 /// outcome. Tristate `ExpectedOutcome::NotServerError` lets a
2496 /// variant-b probe pass on 2xx-4xx and fail only on 5xx.
2497 #[test]
2498 fn expected_outcome_pass_rules() {
2499 // Success (positive): 2xx-3xx pass, 4xx + 5xx fail.
2500 assert!(ExpectedOutcome::Success.passes(200));
2501 assert!(ExpectedOutcome::Success.passes(201));
2502 assert!(ExpectedOutcome::Success.passes(204));
2503 assert!(ExpectedOutcome::Success.passes(301));
2504 assert!(!ExpectedOutcome::Success.passes(400));
2505 assert!(!ExpectedOutcome::Success.passes(415));
2506 assert!(!ExpectedOutcome::Success.passes(500));
2507 assert!(!ExpectedOutcome::Success.passes(0));
2508
2509 // ClientError (negative): only 4xx pass.
2510 assert!(!ExpectedOutcome::ClientError.passes(200));
2511 assert!(ExpectedOutcome::ClientError.passes(400));
2512 assert!(ExpectedOutcome::ClientError.passes(404));
2513 assert!(ExpectedOutcome::ClientError.passes(422));
2514 assert!(!ExpectedOutcome::ClientError.passes(500));
2515
2516 // NotServerError (variant-b): 2xx-4xx pass, 5xx fails.
2517 assert!(ExpectedOutcome::NotServerError.passes(200));
2518 assert!(ExpectedOutcome::NotServerError.passes(204));
2519 assert!(ExpectedOutcome::NotServerError.passes(400), "Srikanth's vCenter consolecli case: 400 from a pattern validator should NOT be a probe failure");
2520 assert!(ExpectedOutcome::NotServerError.passes(415));
2521 assert!(ExpectedOutcome::NotServerError.passes(422));
2522 assert!(
2523 !ExpectedOutcome::NotServerError.passes(500),
2524 "Server CRASH on embedded content is the only real failure"
2525 );
2526 assert!(!ExpectedOutcome::NotServerError.passes(502));
2527 assert!(!ExpectedOutcome::NotServerError.passes(503));
2528 // status 0 (network error / probe never reached the server) does not pass either
2529 assert!(!ExpectedOutcome::NotServerError.passes(0));
2530 }
2531
2532 /// Round 35 (#859) — the per-capture `expected_status_range`
2533 /// string is what the HTML viewer's "show mismatches only"
2534 /// filter and Srikanth's `jq` pipelines key off, so the new
2535 /// tristate must surface a third distinct value.
2536 #[test]
2537 fn expected_outcome_string_labels() {
2538 assert_eq!(ExpectedOutcome::Success.as_str(), "2xx-3xx");
2539 assert_eq!(ExpectedOutcome::ClientError.as_str(), "4xx");
2540 assert_eq!(ExpectedOutcome::NotServerError.as_str(), "2xx-4xx");
2541 }
2542
2543 /// Round 26 — Srikanth saw `at /: Type { kind: Single` in his
2544 /// 0.3.169 capture for the vCenter `infraprofile/configs` 202
2545 /// response (spec promised `type: string`, server returned a
2546 /// JSON object). The output was a broken-syntax debug string.
2547 /// This test reproduces his exact spec+body and asserts the
2548 /// message is readable.
2549 #[test]
2550 fn response_schema_error_message_is_readable() {
2551 let schema = serde_json::json!({"type": "string"});
2552 let body = r#"{"data":{},"id":"generated_id","status":"created"}"#;
2553 let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2554 // The message must NOT contain Rust debug syntax leftovers
2555 // ("Type { kind:", trailing "{" or "(" tokens). It SHOULD say
2556 // what type was expected.
2557 assert!(!err.contains("Type { kind"), "stale debug output: {err}");
2558 assert!(!err.contains("{ kind:"), "stale debug output: {err}");
2559 assert!(err.contains("string"), "should name expected type: {err}");
2560 // Round 29 — Srikanth on 0.3.172 was confused by `at /:`,
2561 // thinking it pointed to the URL path. The new format
2562 // explicitly says "response body root" for the root case
2563 // (and "response body at /<pointer>" for nested fields).
2564 assert!(
2565 err.contains("response body root"),
2566 "should label root explicitly so reader knows it's not the URL: {err}"
2567 );
2568 // Round 28 — Srikanth wanted the expected schema embedded
2569 // in the message so it reads as 'expected schema {"type":"string"}'.
2570 assert!(
2571 err.contains("expected schema") && err.contains("\"type\":\"string\""),
2572 "should include expected schema JSON: {err}"
2573 );
2574 }
2575
2576 /// Round 29 — for non-root paths the format reads
2577 /// "response body at /name: ...". Catches the case where the
2578 /// root rewording accidentally dropped the JSON-pointer for
2579 /// nested fields.
2580 #[test]
2581 fn response_schema_error_uses_response_body_prefix_for_nested_paths() {
2582 let schema = serde_json::json!({
2583 "type": "object",
2584 "required": ["name"],
2585 "properties": {"name": {"type": "string"}}
2586 });
2587 let body = r#"{"name": 123}"#;
2588 let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2589 assert!(
2590 err.contains("response body at /name"),
2591 "nested path should read 'response body at /name': {err}"
2592 );
2593 assert!(!err.contains("response body root"), "wrong label for nested: {err}");
2594 // Round 30 — the "expected schema" suffix should be the
2595 // sub-schema at /name, not the entire object schema. Reader
2596 // shouldn't have to scan a 300-char object to find the
2597 // constraint that failed.
2598 assert!(
2599 err.contains(r#"expected schema {"type":"string"}"#),
2600 "should show only the /name sub-schema, not the full object: {err}"
2601 );
2602 }
2603
2604 /// Round 30 — Srikanth asked how a deeper nested mismatch reads.
2605 /// Schema: `name.type` should be a string; body has it as a number.
2606 /// JSON pointer is `/name/type`.
2607 #[test]
2608 fn response_schema_error_uses_response_body_prefix_for_deep_nested_paths() {
2609 let schema = serde_json::json!({
2610 "type": "object",
2611 "properties": {
2612 "name": {
2613 "type": "object",
2614 "properties": {"type": {"type": "string"}}
2615 }
2616 }
2617 });
2618 let body = r#"{"name": {"type": 123}}"#;
2619 let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2620 assert!(
2621 err.contains("response body at /name/type"),
2622 "deep nested path should read 'response body at /name/type': {err}"
2623 );
2624 // Round 30 — for deep paths the sub-schema is the leaf
2625 // {"type":"string"}, not the wrapping object schemas.
2626 assert!(
2627 err.contains(r#"expected schema {"type":"string"}"#),
2628 "should show only the /name/type leaf sub-schema: {err}"
2629 );
2630 }
2631
2632 /// Round 30 — when the instance pointer can't be resolved through
2633 /// the schema's `properties` chain (e.g. additionalProperties hit),
2634 /// `sub_schema_at_pointer` returns None and the message falls back
2635 /// to the full schema. Verifies the fallback path is wired.
2636 #[test]
2637 fn sub_schema_at_pointer_falls_back_for_unresolvable_paths() {
2638 let schema = serde_json::json!({"type":"object","additionalProperties":true});
2639 // Walker can't resolve /unknown, so we get the full schema back.
2640 assert_eq!(
2641 sub_schema_at_pointer(&schema, "/unknown"),
2642 None,
2643 "unresolvable path should return None to trigger fallback"
2644 );
2645 // Root path returns the whole schema.
2646 assert_eq!(sub_schema_at_pointer(&schema, "/"), Some(schema.clone()));
2647 assert_eq!(sub_schema_at_pointer(&schema, ""), Some(schema));
2648 }
2649
2650 #[test]
2651 fn response_schema_error_required_field_is_readable() {
2652 let schema = serde_json::json!({
2653 "type": "object",
2654 "required": ["id"],
2655 "properties": {"id": {"type": "integer"}}
2656 });
2657 let body = r#"{"other": 1}"#;
2658 let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2659 assert!(err.contains("required field missing"), "{err}");
2660 assert!(err.contains("id"), "{err}");
2661 }
2662
2663 /// Round 31 — Srikanth's vCenter case on 0.3.174: the
2664 /// `Appliance.Recovery.Backup.SystemName.Archive.Info` schema has
2665 /// a multi-paragraph description and ~6 required fields, of which
2666 /// `comment` was missing in the response. Before this fix the
2667 /// printed schema was the WHOLE parent object schema (parent's
2668 /// description bleeding in, all sibling property schemas dumped)
2669 /// truncated to 300 chars; after the fix it's the missing field's
2670 /// own schema. Verifies (a) parent description is gone and
2671 /// (b) sibling property names don't appear in the message.
2672 #[test]
2673 fn response_schema_error_required_focuses_on_missing_field_only() {
2674 let schema = serde_json::json!({
2675 "description": "The Appliance.Recovery.Backup.SystemName.Archive.Info schema represents backup archive information.\n\nThis schema was added in vSphere API 6.7.",
2676 "type": "object",
2677 "required": ["comment", "location", "parts", "system_name", "timestamp", "version"],
2678 "properties": {
2679 "comment": {
2680 "type": "string",
2681 "description": "Custom comment added by the user for this backup."
2682 },
2683 "location": {"type": "string", "description": "Backup location URL."},
2684 "parts": {"type": "array", "items": {"type": "string"}},
2685 "system_name": {"type": "string"},
2686 "timestamp": {"type": "string", "format": "date-time"},
2687 "version": {"type": "string"}
2688 }
2689 });
2690 let body = r#"{"location":"x","parts":[],"system_name":"y","timestamp":"z","version":"v"}"#;
2691 let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2692 assert!(err.contains("required field missing: \"comment\""), "{err}");
2693 // Parent's description should not appear; only the `comment`
2694 // field's own description (if any) may.
2695 assert!(
2696 !err.contains("Appliance.Recovery.Backup"),
2697 "parent description should not bleed into focused schema: {err}"
2698 );
2699 // No sibling property names should appear in the focused schema
2700 // suffix.
2701 for sibling in ["location", "parts", "system_name", "timestamp", "version"] {
2702 assert!(
2703 !err.contains(&format!("\"{sibling}\"")),
2704 "sibling field {sibling} should not appear in focused schema: {err}"
2705 );
2706 }
2707 }
2708
2709 #[test]
2710 fn response_schema_error_none_on_match() {
2711 let schema = serde_json::json!({"type": "string"});
2712 assert_eq!(validate_body_against_schema("\"hello\"", &schema), None);
2713 }
2714
2715 /// Round 34 (#827) — Srikanth on 0.3.178 hit the vCenter
2716 /// `consolecli` PUT where the `enabled: boolean` property has a
2717 /// multi-paragraph description. The schema printout truncated
2718 /// mid-description, hiding `type: boolean` past the 300-char cap.
2719 /// Stripping `description` (and friends) before serializing must
2720 /// keep the type info visible.
2721 #[test]
2722 fn response_schema_error_strips_description_so_type_survives_truncation() {
2723 // Schema crafted so without stripping, `description` would
2724 // push `type` past the 300-char truncation cap. The
2725 // description we use here is intentionally close to the
2726 // vCenter-spec wording Srikanth quoted.
2727 let big_desc = "In the result of the #get and #list operations this property indicates whether proxying is enabled for a particular protocol. In the input to the test and set operations this property specifies whether proxying should be enabled for a particular protocol. This property was added in vSphere API 6.7. Defaults to enabled if both this field and the value field are unset.";
2728 let schema = serde_json::json!({
2729 "type": "object",
2730 "required": ["enabled"],
2731 "properties": {
2732 "enabled": {
2733 "type": "boolean",
2734 "description": big_desc,
2735 "example": true,
2736 }
2737 }
2738 });
2739 let body = r#"{}"#;
2740 let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2741 assert!(err.contains("required field missing: \"enabled\""), "{err}");
2742 assert!(
2743 err.contains(r#""type":"boolean""#),
2744 "the `type: boolean` keyword must survive truncation: {err}"
2745 );
2746 // Description should NOT appear (we stripped it) so the
2747 // suffix is type-focused, not prose.
2748 assert!(
2749 !err.contains("proxying is enabled"),
2750 "description should be stripped from the printed schema: {err}"
2751 );
2752 assert!(
2753 !err.contains("\"example\""),
2754 "`example` field should be stripped from the printed schema: {err}"
2755 );
2756 }
2757
2758 /// Round 34 (#827) — strip_schema_noise should keep all
2759 /// constraint keywords intact; only the prose noise goes.
2760 #[test]
2761 fn strip_schema_noise_preserves_constraint_keywords() {
2762 let schema = serde_json::json!({
2763 "type": "object",
2764 "required": ["a", "b"],
2765 "description": "should be stripped",
2766 "title": "should be stripped",
2767 "example": {"a": 1, "b": 2},
2768 "properties": {
2769 "a": {"type": "string", "format": "uri", "minLength": 1, "description": "drop"},
2770 "b": {"type": "integer", "minimum": 0, "maximum": 100, "summary": "drop"},
2771 },
2772 });
2773 let stripped = strip_schema_noise(&schema);
2774 let s = serde_json::to_string(&stripped).unwrap();
2775 // Constraint keywords survive.
2776 for keep in [
2777 "\"type\"",
2778 "\"required\"",
2779 "\"properties\"",
2780 "\"format\"",
2781 "\"minLength\"",
2782 "\"minimum\"",
2783 "\"maximum\"",
2784 ] {
2785 assert!(s.contains(keep), "should keep {keep}: {s}");
2786 }
2787 // Noise fields are gone.
2788 for drop in ["description", "title", "example", "summary"] {
2789 assert!(!s.contains(&format!("\"{drop}\"")), "should strip {drop}: {s}");
2790 }
2791 }
2792
2793 #[test]
2794 fn json_serialises_report() {
2795 let r = SelfTestReport {
2796 positive_pass: 1,
2797 positive_fail: 0,
2798 negative_caught: BTreeMap::new(),
2799 negative_missed: BTreeMap::new(),
2800 operations: vec![OperationResult {
2801 method: "GET".into(),
2802 path: "/x".into(),
2803 positive: Some(CaseOutcome {
2804 label: "positive".into(),
2805 expected_4xx: false,
2806 actual_status: 200,
2807 passed: true,
2808 }),
2809 negatives: Vec::new(),
2810 }],
2811 };
2812 let json = serde_json::to_value(&r).expect("serialises");
2813 assert_eq!(json["positive_pass"], serde_json::json!(1));
2814 assert_eq!(json["operations"][0]["positive"]["actual_status"], serde_json::json!(200));
2815 }
2816}