mockforge_bench/conformance/self_test.rs
1//! Positive + per-category negative request driver against a live server.
2//!
3//! Issue #79 round 13 (4) — Srikanth's (e) ask: a way to test both
4//! positive and negative compliance scenarios separately, where the
5//! positive cases should pass and the negative cases should be
6//! rejected.
7//!
8//! This module sits *alongside* the existing conformance executor
9//! (which drives k6 / native checks on a single positive call per
10//! operation). The self-test driver synthesises per-category
11//! deliberately-bad requests and asserts that the server actually
12//! rejects them with a 4xx — useful when verifying that
13//! `validate_request_with_all` is wired correctly for the user's spec
14//! (the exact gap that round-13 (3) fixed).
15//!
16//! Scope of the initial MVP: covers the highest-signal negatives —
17//! empty body when one is required, missing required query/header
18//! params, and wrong-type path params. Doesn't try to mutate every
19//! field of a JSON-Schema-validated body; that's a follow-up.
20
21use super::spec_driven::{AnnotatedOperation, ApiKeyLocation, SecuritySchemeInfo};
22use reqwest::{Client, Method};
23use std::collections::BTreeMap;
24use std::net::IpAddr;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::sync::{Arc, Mutex};
27use std::time::Duration;
28
29/// Round 23 (c-iii) — per-direction body cap when capturing
30/// request/response payloads to `conformance-self-test-requests.jsonl`.
31/// 16 KiB keeps a 1000-case run under ~32 MB even if every payload
32/// fills the cap, while still preserving enough of a typical JSON body
33/// (or a stack-trace error response) to debug from.
34const CAPTURE_BODY_CAP_BYTES: usize = 16 * 1024;
35
36/// Round 17.2 — cap on schema-driven negatives per operation. A spec
37/// with 100 properties per body could produce hundreds of mutations
38/// for a single operation; combined with thousands of operations
39/// that's a runaway test matrix. 12 covers the highest-signal
40/// mutations (type mismatch + required-removed + a few constraint
41/// breaks) without exploding wall time on large specs.
42const SCHEMA_MUTATION_CAP: usize = 12;
43
44/// Round 25 (k) — content-type swap probes. For operations declaring a
45/// JSON request body, each entry below produces one probe that lies
46/// about Content-Type while keeping the JSON payload. A spec-compliant
47/// server should respond 415 (or 400). Order matches the order
48/// Srikanth listed in his round-23 reply: XML, YAML, multipart, and
49/// the URL-encoded variant he added in round 24.
50const CONTENT_TYPE_SWAP_VARIANTS: &[(&str, &str)] = &[
51 ("application/xml", "request-body:content-type-mismatch:xml"),
52 ("application/yaml", "request-body:content-type-mismatch:yaml"),
53 ("multipart/form-data", "request-body:content-type-mismatch:multipart"),
54 (
55 "application/x-www-form-urlencoded",
56 "request-body:content-type-mismatch:urlencoded",
57 ),
58];
59
60/// Round 27 (k variant b) — embedded content payloads. Content-Type
61/// stays `application/json` and the envelope IS valid JSON; we just
62/// stuff a non-JSON snippet into a string field's value. The test
63/// surfaces servers that try to parse string field contents (e.g.
64/// XML-EE expanders, YAML loaders, urlencoded parsers) and crash on
65/// the payload — a 5xx here is the finding. Label, payload pairs:
66const EMBEDDED_CONTENT_VARIANTS: &[(&str, &str)] = &[
67 ("request-body:embedded-content:xml", "<root><cmd>execute()</cmd></root>"),
68 ("request-body:embedded-content:yaml", "key: value\n- item1\n- item2"),
69 (
70 "request-body:embedded-content:multipart",
71 "--boundary\r\nContent-Disposition: form-data; name=\"x\"\r\n\r\nval\r\n--boundary--",
72 ),
73 ("request-body:embedded-content:urlencoded", "a=1&b=2&c=hello%20world"),
74];
75
76/// Configuration for a self-test run.
77#[derive(Debug, Clone)]
78pub struct SelfTestConfig {
79 pub target_url: String,
80 pub skip_tls_verify: bool,
81 pub timeout: Duration,
82 /// Optional extra headers to attach to every request (e.g. auth).
83 pub extra_headers: Vec<(String, String)>,
84 /// Delay between requests to avoid hammering the server.
85 pub delay_between_requests: Duration,
86 /// Round 18.1 — base path to prepend to every spec path. When the
87 /// spec declares `/users` and the deployed API is served under
88 /// `/api`, `--base-path /api` should make the self-test hit
89 /// `https://target/api/users` instead of `https://target/users`.
90 /// Pre-fix this was ignored entirely and every operation 404'd
91 /// (Srikanth's vCenter run on 0.3.152: 1275 positives, 1275 4xx).
92 pub base_path: Option<String>,
93 /// Round 18.5 — local source IPs to bind outgoing requests to.
94 /// Each IP must already be assigned to an interface on the host.
95 /// Operations round-robin through the resulting client pool.
96 pub source_ips: Vec<IpAddr>,
97 /// Round 18.5 — fake source IPs to advertise via forwarded-IP
98 /// headers (used to exercise GEODB lookup at the destination).
99 /// Rotated per operation.
100 pub geo_source_ips: Vec<IpAddr>,
101 /// Which forwarded-IP header(s) to populate when `geo_source_ips`
102 /// is non-empty. Empty → no-op; default below sets the standard
103 /// three-header set.
104 pub geo_source_headers: Vec<String>,
105 /// Round 23 (c-iii) — when `Some`, every probe captures method, URL,
106 /// request headers/body and response status/headers/body into this
107 /// sink. Caller drains it after `run_self_test` and writes
108 /// `conformance-self-test-requests.jsonl`. None → no capture (zero
109 /// extra allocations on the hot path).
110 pub capture: Option<Arc<Mutex<Vec<CaseCapture>>>>,
111 /// Round 25 — when true, validate every probe's response body
112 /// against the spec's response schema for the actual status
113 /// returned (closes round 21.3 / Srikanth's a2 / a3 ask). The
114 /// validation result lands in `CaseCapture::response_schema_error`
115 /// (None → matched, or no schema for that status). Default false:
116 /// JSON-Schema validation of large response bodies adds wall-clock
117 /// time and the user has to opt in.
118 pub validate_response_schemas: bool,
119}
120
121/// Round 23 (c-iii) — one captured request/response pair, one per
122/// probe (positive or negative). Serialised as a JSON line in
123/// `conformance-self-test-requests.jsonl`. Headers are kept as
124/// `BTreeMap` for stable ordering. Bodies are truncated to
125/// `CAPTURE_BODY_CAP_BYTES`; `*_truncated` flags whether more was
126/// dropped.
127#[derive(Debug, Clone, serde::Serialize)]
128pub struct CaseCapture {
129 pub label: String,
130 pub method: String,
131 pub url: String,
132 pub request_headers: BTreeMap<String, String>,
133 pub request_body: Option<String>,
134 pub request_body_truncated: bool,
135 pub response_status: u16,
136 pub response_headers: BTreeMap<String, String>,
137 pub response_body: Option<String>,
138 pub response_body_truncated: bool,
139 pub error: Option<String>,
140 /// Round 25 — when `validate_response_schemas` is on and the spec
141 /// declares a schema for `response_status`, this carries the
142 /// validation message (or None when the body matched, or no schema
143 /// was declared for that status). Serialised verbatim in the JSONL
144 /// and rendered in the HTML viewer.
145 #[serde(default, skip_serializing_if = "Option::is_none")]
146 pub response_schema_error: Option<String>,
147 /// Round 28 — Srikanth's "Is it possible to put expected response
148 /// code status in both jsonl and jsonl report" ask. Human-readable
149 /// expected status range: `"2xx-3xx"` for positive probes,
150 /// `"4xx"` for negatives. Lets users `jq` for misses
151 /// (`.response_status as $s | .expected_status_range == "4xx"
152 /// and ($s < 400 or $s >= 500)`) and powers the HTML viewer's
153 /// "show mismatches only" filter.
154 #[serde(default)]
155 pub expected_status_range: String,
156}
157
158impl Default for SelfTestConfig {
159 fn default() -> Self {
160 Self {
161 target_url: "http://localhost:3000".into(),
162 skip_tls_verify: false,
163 timeout: Duration::from_secs(15),
164 extra_headers: Vec::new(),
165 delay_between_requests: Duration::from_millis(0),
166 base_path: None,
167 source_ips: Vec::new(),
168 geo_source_ips: Vec::new(),
169 geo_source_headers: default_geo_source_headers(),
170 capture: None,
171 validate_response_schemas: false,
172 }
173 }
174}
175
176/// Truncate `body` to `CAPTURE_BODY_CAP_BYTES` on a UTF-8 boundary,
177/// returning the trimmed string and whether truncation occurred. Used
178/// for both request and response bodies in the capture sink.
179fn truncate_body_for_capture(body: &str) -> (String, bool) {
180 if body.len() <= CAPTURE_BODY_CAP_BYTES {
181 return (body.to_string(), false);
182 }
183 let mut end = CAPTURE_BODY_CAP_BYTES;
184 while end > 0 && !body.is_char_boundary(end) {
185 end -= 1;
186 }
187 (body[..end].to_string(), true)
188}
189
190/// Default forwarded-IP header set. Covers the three conventions a
191/// real GEODB front-end is likely to read in this order of
192/// preference: Cloudflare (`CF-Connecting-IP`), Akamai/CloudFront
193/// (`True-Client-IP`), then the de-facto standard
194/// `X-Forwarded-For`. Override via `--geo-source-header` to test a
195/// specific stack.
196pub fn default_geo_source_headers() -> Vec<String> {
197 vec![
198 "X-Forwarded-For".to_string(),
199 "True-Client-IP".to_string(),
200 "CF-Connecting-IP".to_string(),
201 ]
202}
203
204/// Outcome of a single test case (positive or negative).
205#[derive(Debug, Clone, serde::Serialize)]
206pub struct CaseOutcome {
207 pub label: String,
208 pub expected_4xx: bool,
209 pub actual_status: u16,
210 /// True when the response status matches expectation
211 /// (positive → 2xx-3xx, negative → 4xx).
212 pub passed: bool,
213}
214
215/// All cases run against one annotated operation.
216#[derive(Debug, Clone, serde::Serialize)]
217pub struct OperationResult {
218 pub method: String,
219 pub path: String,
220 pub positive: Option<CaseOutcome>,
221 pub negatives: Vec<CaseOutcome>,
222}
223
224/// Summary report rolled up across all operations.
225#[derive(Debug, Default, Clone, serde::Serialize)]
226pub struct SelfTestReport {
227 pub positive_pass: usize,
228 pub positive_fail: usize,
229 /// Per category: count of negative cases the server correctly
230 /// rejected with a 4xx (we caught the spec violation).
231 pub negative_caught: BTreeMap<String, usize>,
232 /// Per category: count of negative cases that should have been
233 /// rejected but came back with a non-4xx (validator gap).
234 pub negative_missed: BTreeMap<String, usize>,
235 pub operations: Vec<OperationResult>,
236}
237
238impl SelfTestReport {
239 /// All-pass means every positive case got 2xx-3xx and every
240 /// negative case got 4xx.
241 pub fn all_passed(&self) -> bool {
242 self.positive_fail == 0 && self.negative_missed.values().sum::<usize>() == 0
243 }
244
245 /// Round 18.1 — detect the "self-test target is misconfigured"
246 /// case where every positive failed with the *same* status code.
247 /// The classic example: `--base-path /api` was forgotten so every
248 /// request hits a path the server doesn't know and returns 404.
249 /// Pre-warning, the user saw all-green negative buckets (because
250 /// "missing route" 404s look like "validator rejected") and no
251 /// indication that the run was meaningless. Returns Some(status)
252 /// when ≥10 positives all failed with the same status, else None.
253 pub fn detect_target_misconfiguration(&self) -> Option<u16> {
254 if self.positive_pass > 0 || self.positive_fail < 10 {
255 return None;
256 }
257 let mut seen: Option<u16> = None;
258 for op in &self.operations {
259 let Some(p) = &op.positive else {
260 continue;
261 };
262 if p.passed {
263 return None;
264 }
265 match seen {
266 None => seen = Some(p.actual_status),
267 Some(s) if s != p.actual_status => return None,
268 _ => {}
269 }
270 }
271 seen
272 }
273
274 /// Human-readable summary string. One line for positives, one per
275 /// category for negatives. Designed to slot into existing
276 /// `TerminalReporter` output.
277 pub fn render_summary(&self) -> String {
278 let mut out = String::new();
279 out.push_str(&format!(
280 "Positives: {} pass / {} fail\n",
281 self.positive_pass, self.positive_fail
282 ));
283 let mut keys: Vec<&String> =
284 self.negative_caught.keys().chain(self.negative_missed.keys()).collect();
285 keys.sort();
286 keys.dedup();
287 for cat in keys {
288 let caught = self.negative_caught.get(cat).copied().unwrap_or(0);
289 let missed = self.negative_missed.get(cat).copied().unwrap_or(0);
290 let mark = if missed == 0 { "✓" } else { "⚠" };
291 out.push_str(&format!(
292 "Negatives [{}]: {} caught / {} missed {}\n",
293 cat, caught, missed, mark
294 ));
295 }
296 out
297 }
298}
299
300/// Execute the self-test plan against `config.target_url` for every
301/// `AnnotatedOperation`. Returns the aggregated report; callers
302/// decide how to display it (e.g. via `render_summary` or by writing
303/// the JSON serialisation to disk).
304pub async fn run_self_test(
305 operations: &[AnnotatedOperation],
306 config: &SelfTestConfig,
307) -> Result<SelfTestReport, reqwest::Error> {
308 // Round 18.5 — build a client pool when `source_ips` is set,
309 // one reqwest::Client per IP, each bound to its local address.
310 // Operations round-robin through the pool. Empty pool → single
311 // default client (the pre-18.5 behaviour).
312 let clients = build_client_pool(config)?;
313 let client_cursor = AtomicUsize::new(0);
314 let geo_cursor = AtomicUsize::new(0);
315
316 let mut report = SelfTestReport::default();
317 for op in operations {
318 let client_idx = client_cursor.fetch_add(1, Ordering::Relaxed) % clients.len();
319 let client = &clients[client_idx];
320 let geo_ip = if config.geo_source_ips.is_empty() {
321 None
322 } else {
323 let idx = geo_cursor.fetch_add(1, Ordering::Relaxed) % config.geo_source_ips.len();
324 Some(config.geo_source_ips[idx])
325 };
326 let result = test_operation(client, config, op, geo_ip).await;
327 if let Some(p) = &result.positive {
328 if p.passed {
329 report.positive_pass += 1;
330 } else {
331 report.positive_fail += 1;
332 }
333 }
334 for neg in &result.negatives {
335 let cat = neg.label.split(':').next().unwrap_or("other").to_string();
336 if neg.passed {
337 *report.negative_caught.entry(cat).or_insert(0) += 1;
338 } else {
339 *report.negative_missed.entry(cat).or_insert(0) += 1;
340 }
341 }
342 report.operations.push(result);
343 if !config.delay_between_requests.is_zero() {
344 tokio::time::sleep(config.delay_between_requests).await;
345 }
346 }
347 Ok(report)
348}
349
350/// Round 18.5 — append GEODB forwarded-IP headers to the
351/// operation's declared headers. Returns the original vec untouched
352/// when `geo_ip` is None or `geo_headers` is empty.
353///
354/// If the operation already declares one of the geo headers (rare
355/// but legal), we keep the operation's value — the caller's spec
356/// wins.
357fn effective_op_headers(
358 base: &[(String, String)],
359 geo_ip: Option<IpAddr>,
360 geo_headers: &[String],
361) -> Vec<(String, String)> {
362 let mut out = base.to_vec();
363 let Some(ip) = geo_ip else {
364 return out;
365 };
366 let value = ip.to_string();
367 for h in geo_headers {
368 // Case-insensitive duplicate check: don't override the
369 // spec's own declared value for the header.
370 if out.iter().any(|(k, _)| k.eq_ignore_ascii_case(h)) {
371 continue;
372 }
373 out.push((h.clone(), value.clone()));
374 }
375 out
376}
377
378/// Round 18.5 — build a pool of reqwest clients, one per declared
379/// source IP. Empty `source_ips` → a single default client.
380///
381/// The OS must already have each `source_ip` assigned to an
382/// interface; reqwest's `.local_address()` issues a `bind()` syscall
383/// at connect time, so an IP the kernel doesn't recognise surfaces
384/// as `EADDRNOTAVAIL` at request time, not at builder time.
385fn build_client_pool(config: &SelfTestConfig) -> Result<Vec<Client>, reqwest::Error> {
386 let make = |bind: Option<IpAddr>| -> Result<Client, reqwest::Error> {
387 let mut builder = Client::builder().timeout(config.timeout);
388 if config.skip_tls_verify {
389 builder = builder.danger_accept_invalid_certs(true);
390 }
391 if let Some(addr) = bind {
392 builder = builder.local_address(addr);
393 }
394 builder.build()
395 };
396 if config.source_ips.is_empty() {
397 Ok(vec![make(None)?])
398 } else {
399 config.source_ips.iter().map(|ip| make(Some(*ip))).collect()
400 }
401}
402
403async fn test_operation(
404 client: &Client,
405 config: &SelfTestConfig,
406 op: &AnnotatedOperation,
407 geo_ip: Option<IpAddr>,
408) -> OperationResult {
409 // Round 25 — track the sink length BEFORE we run any probes for
410 // this operation, so that after the probes finish we can mutate
411 // exactly the entries that belong to this op (the capture sink is
412 // shared but `run_self_test` iterates operations sequentially).
413 // Used by the response-schema validation pass below.
414 let sink_start = config.capture.as_ref().and_then(|s| s.lock().ok().map(|g| g.len()));
415
416 let url = build_url_with_base(
417 &config.target_url,
418 config.base_path.as_deref(),
419 &op.path,
420 &op.path_params,
421 );
422 let method = Method::from_bytes(op.method.to_uppercase().as_bytes()).unwrap_or(Method::GET);
423
424 // Round 18.5 — pre-compute the operation's effective headers
425 // with the geo source IP baked in. Doing it once here keeps the
426 // per-case `send_case` calls below unchanged. When `geo_ip` is
427 // None the result equals `op.header_params`.
428 let op_headers = effective_op_headers(&op.header_params, geo_ip, &config.geo_source_headers);
429
430 // ── Positive case ────────────────────────────────────────────
431 let positive = send_case(
432 client,
433 config,
434 method.clone(),
435 &url,
436 "positive",
437 false,
438 op.sample_body.as_deref(),
439 op.query_params.clone(),
440 op_headers.clone(),
441 )
442 .await;
443
444 // ── Negative cases ───────────────────────────────────────────
445 let mut negatives = Vec::new();
446
447 // (a) empty body when one is required.
448 //
449 // Round 16 — drop the `sample_body.is_some()` precondition. Operations
450 // whose body annotator couldn't synthesize a sample previously got
451 // zero negatives (so the self-test reported "all passing" even on
452 // POST /resource with a required body). The spec saying the operation
453 // *has* a request body is enough — an empty object is a valid
454 // negative regardless of whether we have a positive sample.
455 if op.request_body_content_type.is_some() {
456 negatives.push(
457 send_case(
458 client,
459 config,
460 method.clone(),
461 &url,
462 "request-body:empty",
463 true,
464 Some("{}"),
465 op.query_params.clone(),
466 op_headers.clone(),
467 )
468 .await,
469 );
470
471 // (b) wrong-shaped body (array instead of object) — exercises
472 // top-level type validation independently of which fields are
473 // required.
474 negatives.push(
475 send_case(
476 client,
477 config,
478 method.clone(),
479 &url,
480 "request-body:wrong-type",
481 true,
482 Some("[]"),
483 op.query_params.clone(),
484 op_headers.clone(),
485 )
486 .await,
487 );
488
489 // Round 25 (k) — content-type swap probes.
490 //
491 // For operations declaring `application/json` request bodies, send
492 // the SAME json payload (or a synthesised one) under four other
493 // content types: `application/xml`, `application/yaml`,
494 // `multipart/form-data`, `application/x-www-form-urlencoded`.
495 // The spec says the endpoint accepts only JSON, so a strict server
496 // should respond 415 Unsupported Media Type (or 400 if it tries
497 // to parse and fails). A 2xx means the server is accepting
498 // payloads outside its declared content negotiation, which is the
499 // failure mode behind a lot of "we crashed on a malformed XML
500 // upload" incidents.
501 //
502 // Variant (a) of Srikanth's round-23 g ask: lie about the
503 // Content-Type header. The body shape is honest JSON; only the
504 // header is swapped. Variant (b) (JSON envelope with embedded
505 // non-JSON field values) is deferred to round 26 because it
506 // requires a schema-aware field walker.
507 if op
508 .request_body_content_type
509 .as_deref()
510 .map(|ct| ct.contains("json"))
511 .unwrap_or(false)
512 {
513 let payload = op.sample_body.as_deref().unwrap_or("{}");
514 for (ct, label) in CONTENT_TYPE_SWAP_VARIANTS {
515 negatives.push(
516 send_case_with_extra(
517 client,
518 config,
519 method.clone(),
520 &url,
521 label,
522 true,
523 Some(payload),
524 op.query_params.clone(),
525 // Strip any Content-Type already on the operation
526 // headers (the spec's positive value) so the
527 // probe's value is the only one the server sees.
528 op_headers
529 .iter()
530 .filter(|(k, _)| !k.eq_ignore_ascii_case("content-type"))
531 .cloned()
532 .collect(),
533 // The wrong Content-Type rides on `extra_headers`
534 // so it lands AFTER `send_case_with_extra`'s
535 // unconditional `application/json` insertion in
536 // request-body mode. Actually `send_case_with_extra`
537 // only sets Content-Type when a body is present
538 // AND there's no manual override; passing the
539 // override here wins because reqwest preserves
540 // the last-set header value.
541 vec![("Content-Type".to_string(), (*ct).to_string())],
542 )
543 .await,
544 );
545 }
546
547 // Round 27 (k variant b) — embedded non-JSON content
548 // inside a valid JSON envelope. Content-Type stays
549 // application/json (honest) and the body parses as JSON;
550 // only the string-valued payload changes. We expect 2xx-3xx
551 // because the envelope is spec-shape, so the probe surfaces
552 // servers that crash (5xx) trying to parse the embedded
553 // snippet as XML/YAML/etc. A 4xx is also a finding because
554 // it usually means the server's pattern/format validator
555 // tripped on the payload contents, but the user can decide
556 // from the JSONL whether that's a bug or correct narrow-
557 // string-field behaviour.
558 for (label, snippet) in EMBEDDED_CONTENT_VARIANTS {
559 let payload = op.sample_body.as_deref().unwrap_or("{}");
560 let body = embed_payload_in_first_string_field(payload, snippet);
561 negatives.push(
562 send_case(
563 client,
564 config,
565 method.clone(),
566 &url,
567 label,
568 // expected_4xx=false: any non-2xx is a probe
569 // failure. 5xx in particular is "server panicked
570 // on the embedded content".
571 false,
572 Some(&body),
573 op.query_params.clone(),
574 op_headers.clone(),
575 )
576 .await,
577 );
578 }
579 }
580
581 // Round 17.2 — schema-aware negatives.
582 //
583 // When both a positive sample AND the resolved body schema are
584 // available, mutate the sample per-field (type mismatch,
585 // min/max bounds, pattern, enum out-of-range, required-field
586 // removal) and assert each is rejected with 4xx. Capped at
587 // SCHEMA_MUTATION_CAP per operation so a 100-property body
588 // doesn't explode the test matrix.
589 if let (Some(sample_str), Some(schema)) =
590 (op.sample_body.as_deref(), op.request_body_schema.as_ref())
591 {
592 if let Ok(sample) = serde_json::from_str::<serde_json::Value>(sample_str) {
593 let mutations = super::schema_mutator::mutate_body(&sample, schema);
594 for m in mutations.into_iter().take(SCHEMA_MUTATION_CAP) {
595 let body_str = serde_json::to_string(&m.body).unwrap_or_default();
596 negatives.push(
597 send_case(
598 client,
599 config,
600 method.clone(),
601 &url,
602 &m.label,
603 true,
604 Some(&body_str),
605 op.query_params.clone(),
606 // Round 24 (f) — was `op.header_params`, which
607 // skipped the geo-IP header. Use `op_headers`
608 // so the geo IP rides with the negative probe
609 // too (positive vs negative coverage must be
610 // symmetric, otherwise a GEODB front-end sees
611 // the rotating IP only on positives).
612 op_headers.clone(),
613 )
614 .await,
615 );
616 }
617 }
618 }
619 }
620
621 // Round 17.2 — URI-length probe. Spec-agnostic but schema-aware in
622 // spirit: most servers cap URIs at 8 KB or so. Append a 9 KB query
623 // string to the URL and expect 414 URI Too Long (or 400). Skipped
624 // for operations that already have a heavy positive query.
625 {
626 let pad = "p=".to_string() + &"x".repeat(9_000);
627 let bad_url = if url.contains('?') {
628 format!("{url}&{pad}")
629 } else {
630 format!("{url}?{pad}")
631 };
632 negatives.push(
633 send_case(
634 client,
635 config,
636 method.clone(),
637 &bad_url,
638 "parameters:uri-too-long",
639 true,
640 op.sample_body.as_deref(),
641 op.query_params.clone(),
642 // Round 24 (f) — see schema-mutation note above. Use
643 // `op_headers` (carries geo IP) instead of bare
644 // `op.header_params`.
645 op_headers.clone(),
646 )
647 .await,
648 );
649 }
650
651 // (e) Round 16 — path-param type probe. Send the first path
652 // parameter as a literal `"self-test-invalid-id"`: a string that
653 // contains hyphens, won't parse as an integer, won't parse as a
654 // UUID, and won't match any typical regex pattern. Operations
655 // whose spec types the param as `integer` or `string` with a
656 // `format`/`pattern` will catch this (caught: server returned
657 // 4xx); operations whose spec lets path params be free-form
658 // strings will let it through (missed: server returned 2xx).
659 // Either outcome is informative: a category that's all "missed"
660 // tells the user their spec is loose on path-param types, which
661 // is itself worth knowing. Addresses Srikanth's "always all
662 // passing" report — operations with a path param now produce at
663 // least one probe instead of zero.
664 if !op.path_params.is_empty() {
665 let mut url_with_placeholder = op.path.clone();
666 if let Some((first_name, _)) = op.path_params.first() {
667 // Substitute every other path-param with its sample so the
668 // route shape stays intact and only the first param is bad.
669 for (name, value) in op.path_params.iter().skip(1) {
670 if !value.is_empty() {
671 url_with_placeholder =
672 url_with_placeholder.replace(&format!("{{{name}}}"), value);
673 }
674 }
675 // Substitute the first param with a guaranteed-invalid
676 // sentinel that's unlikely to match any reasonable schema:
677 // contains characters disallowed in numeric IDs *and* UUIDs.
678 url_with_placeholder =
679 url_with_placeholder.replace(&format!("{{{first_name}}}"), "self-test-invalid-id");
680 // Round 18.1 — honour `base_path` here too, otherwise the
681 // probe URL differs from the positive case and the
682 // resulting 404 is misattributed to "bad path param".
683 let bad_url = build_url_with_base(
684 &config.target_url,
685 config.base_path.as_deref(),
686 &url_with_placeholder,
687 &[],
688 );
689 negatives.push(
690 send_case(
691 client,
692 config,
693 method.clone(),
694 &bad_url,
695 "parameters:bad-path-param",
696 true,
697 op.sample_body.as_deref(),
698 op.query_params.clone(),
699 op_headers.clone(),
700 )
701 .await,
702 );
703 }
704 }
705
706 // (c) drop the first required query param
707 if !op.query_params.is_empty() {
708 let mut q = op.query_params.clone();
709 q.remove(0);
710 negatives.push(
711 send_case(
712 client,
713 config,
714 method.clone(),
715 &url,
716 "parameters:missing-query",
717 true,
718 op.sample_body.as_deref(),
719 q,
720 op_headers.clone(),
721 )
722 .await,
723 );
724 }
725
726 // (s) Round 17.3 — security probes.
727 //
728 // Operations whose spec declares a security requirement get a
729 // dedicated set of negatives. The point isn't to test whether the
730 // server's *real* auth works (the positive case already does that
731 // via `extra_headers`) — it's to check whether deliberately-bad
732 // credentials are still rejected, which is exactly the failure
733 // mode that lets an attacker through a half-wired validator.
734 //
735 // Each probe replaces or omits the relevant auth credential and
736 // expects 401 / 403. A 2xx here is a hard finding: "spec says
737 // this endpoint is protected, server let unauthenticated /
738 // wrong-credential traffic through".
739 //
740 // Bounded: at most one probe per declared scheme kind, so an
741 // operation with 3 security requirements doesn't 4× the request
742 // volume. Skips entirely when `op.security_schemes` is empty.
743 for probe in build_security_probes(&op.security_schemes) {
744 // Strip any pre-existing Authorization or known API-key
745 // header from extra_headers + header_params so the probe
746 // value is the *only* credential the server sees.
747 let stripped_extra = strip_auth(&config.extra_headers, &op.security_schemes);
748 let stripped_headers = strip_auth(&op.header_params, &op.security_schemes);
749 let stripped_query = strip_auth_query(&op.query_params, &op.security_schemes);
750 let mut req_headers = stripped_headers;
751 for (k, v) in &probe.headers {
752 req_headers.push((k.clone(), v.clone()));
753 }
754 // Round 24 (f) — security probes build req_headers from
755 // `op.header_params` directly (we need the stripped-auth
756 // variant), so the geo-IP header doesn't ride along
757 // automatically. Append it here so a GEODB / WAF in front
758 // of the auth layer still sees the rotating source IP.
759 if let Some(ip) = geo_ip {
760 let ip_str = ip.to_string();
761 for h in &config.geo_source_headers {
762 let already = req_headers.iter().any(|(k, _)| k.eq_ignore_ascii_case(h));
763 if !already {
764 req_headers.push((h.clone(), ip_str.clone()));
765 }
766 }
767 }
768 let mut req_query = stripped_query;
769 for (k, v) in &probe.query {
770 req_query.push((k.clone(), v.clone()));
771 }
772 negatives.push(
773 send_case_with_extra(
774 client,
775 config,
776 method.clone(),
777 &url,
778 &probe.label,
779 true,
780 op.sample_body.as_deref(),
781 req_query,
782 req_headers,
783 stripped_extra,
784 )
785 .await,
786 );
787 }
788
789 // (d) drop the first required header
790 if !op.header_params.is_empty() {
791 // Round 24 (f) — start from `op_headers` (so the geo IP rides
792 // along) and only strip the first OPERATION-declared header.
793 // Slicing past `op.header_params.len()` would otherwise risk
794 // dropping the geo header itself; `op_headers` is built as
795 // `op.header_params ++ geo` so index 0 is always operational.
796 let mut h = op_headers.clone();
797 if !h.is_empty() {
798 h.remove(0);
799 }
800 negatives.push(
801 send_case(
802 client,
803 config,
804 method.clone(),
805 &url,
806 "parameters:missing-header",
807 true,
808 op.sample_body.as_deref(),
809 op.query_params.clone(),
810 h,
811 )
812 .await,
813 );
814 }
815
816 // (w) Round 17.5 — OWASP/WAF unification.
817 //
818 // Pull one canonical payload per OWASP category from the existing
819 // `SecurityPayloads` library and emit an injection probe per
820 // category. Targets in priority order: (1) substitute the first
821 // query param's value, (2) substitute the first string field of
822 // the positive JSON body, (3) skip if neither is available.
823 //
824 // Label format `owasp:<category>`, so the existing
825 // `negative_caught` / `negative_missed` rollup groups all OWASP
826 // findings under one `owasp` bucket. Expected 4xx (server should
827 // reject malicious input). A 5xx is a hard finding (server
828 // crashed on the payload); a 2xx is a soft finding (input passed
829 // through unfiltered — may or may not be a real vuln).
830 //
831 // Bounded: at most one probe per category (7 categories total).
832 // Skips the operation entirely if no injection target is
833 // available — open GET endpoints with no params get zero OWASP
834 // probes, no false signal.
835 for probe in build_owasp_probes(op) {
836 negatives.push(
837 send_case(
838 client,
839 config,
840 method.clone(),
841 &url,
842 &probe.label,
843 true,
844 probe.body.as_deref(),
845 probe.query,
846 // Round 24 (f) — OWASP injection probes must also
847 // carry the geo IP, otherwise a WAF / GEODB rule
848 // tuned to a specific source IP would silently let
849 // them through.
850 op_headers.clone(),
851 )
852 .await,
853 );
854 }
855
856 // Round 25 — response-body shape validation pass. For each capture
857 // this op pushed onto the sink, look up the spec's schema for the
858 // actual response status and validate. Result lands in
859 // `response_schema_error` (Some(message) on failure, None on
860 // pass or no-schema-for-this-status). Runs only when the user
861 // opted in AND capture is on (we need the body).
862 if config.validate_response_schemas {
863 if let (Some(sink), Some(start)) = (config.capture.as_ref(), sink_start) {
864 if !op.response_schemas.is_empty() {
865 if let Ok(mut guard) = sink.lock() {
866 let end = guard.len();
867 for i in start..end {
868 let Some(entry) = guard.get_mut(i) else {
869 continue;
870 };
871 let Some(body) = entry.response_body.as_deref() else {
872 continue;
873 };
874 let Some(schema) = op.response_schemas.get(&entry.response_status) else {
875 continue;
876 };
877 entry.response_schema_error = validate_body_against_schema(body, schema);
878 }
879 }
880 }
881 }
882 }
883
884 OperationResult {
885 method: op.method.clone(),
886 path: op.path.clone(),
887 positive: Some(positive),
888 negatives,
889 }
890}
891
892/// Round 25 — validate a JSON body string against an OpenAPI response
893/// schema (already converted to a `serde_json::Value`). Returns
894/// `Some(message)` describing the first violation, or `None` on a
895/// clean pass / non-JSON body / schema-build failure (in which case
896/// the absence of an error means "we didn't have anything to compare
897/// against", not "passed"; the caller-side semantics treat absence as
898/// success because that's what the user sees as silence).
899/// Round 27 (k variant b) — return a JSON body string identical to
900/// `sample` except that the first string-valued leaf has been
901/// replaced with `snippet`. Walks objects depth-first and stops at
902/// the first string. If `sample` is not parseable JSON, or has no
903/// string fields, falls back to wrapping the snippet under a `data`
904/// key so the probe still has a body to send: `{"data": <snippet>}`.
905/// The result is always valid JSON ready for `application/json`.
906fn embed_payload_in_first_string_field(sample: &str, snippet: &str) -> String {
907 let mut parsed: serde_json::Value = match serde_json::from_str(sample) {
908 Ok(v) => v,
909 Err(_) => return format!(r#"{{"data":{}}}"#, json_quote(snippet)),
910 };
911 if !replace_first_string(&mut parsed, snippet) {
912 return format!(r#"{{"data":{}}}"#, json_quote(snippet));
913 }
914 serde_json::to_string(&parsed)
915 .unwrap_or_else(|_| format!(r#"{{"data":{}}}"#, json_quote(snippet)))
916}
917
918/// Helper for `embed_payload_in_first_string_field`: recursively
919/// walk the value and replace the FIRST string leaf encountered.
920/// Returns true when a replacement happened. Honors document order
921/// for objects (BTreeMap-backed `serde_json::Map` iterates in
922/// insertion order) so the choice of which field to mutate is
923/// stable across runs.
924fn replace_first_string(v: &mut serde_json::Value, snippet: &str) -> bool {
925 match v {
926 serde_json::Value::String(s) => {
927 *s = snippet.to_string();
928 true
929 }
930 serde_json::Value::Object(map) => {
931 for (_k, child) in map.iter_mut() {
932 if replace_first_string(child, snippet) {
933 return true;
934 }
935 }
936 false
937 }
938 serde_json::Value::Array(arr) => {
939 for child in arr.iter_mut() {
940 if replace_first_string(child, snippet) {
941 return true;
942 }
943 }
944 false
945 }
946 _ => false,
947 }
948}
949
950/// Helper for `embed_payload_in_first_string_field`'s fallback: take
951/// an arbitrary string and quote it for embedding inside a JSON
952/// literal. `serde_json::to_string(&value)` handles escaping
953/// correctly for unicode + control chars + quotes.
954fn json_quote(s: &str) -> String {
955 serde_json::to_string(s).unwrap_or_else(|_| "\"\"".to_string())
956}
957
958fn validate_body_against_schema(body: &str, schema: &serde_json::Value) -> Option<String> {
959 let parsed: serde_json::Value = serde_json::from_str(body).ok()?;
960 let validator = jsonschema::validator_for(schema).ok()?;
961 let mut errors = validator.iter_errors(&parsed);
962 let first = errors.next()?;
963 // Round 28 — Srikanth on 0.3.170 wanted the message to show the
964 // actual expected schema alongside the kind label so it reads as
965 // "expected schema {...} but got <kind>". We emit a compact JSON
966 // serialisation of the schema as a suffix; the kind label still
967 // names what went wrong in plain English for quick scanning.
968 // Round 26 — Srikanth on 0.3.169: the prior `format!("{:?}", first.kind)
969 // .split('(').next()` produced "Type { kind: Single" (broken Rust
970 // syntax, mismatched braces). Switch to the human-readable mapping
971 // already used in executor.rs: handle the common kinds (Type,
972 // Required, AdditionalProperties, Enum, MinLength, MaxLength,
973 // Minimum, Maximum, Pattern) explicitly; fall back to the
974 // jsonschema crate's Display impl on the error (which produces
975 // something like "{...} is not of type \"string\"") for the long
976 // tail. Combined with `at <instance-path>` for the field location.
977 let path = first.instance_path.to_string();
978 let path = if path.is_empty() { "/" } else { path.as_str() };
979 let kind_msg: String = match &first.kind {
980 jsonschema::error::ValidationErrorKind::Type { kind } => {
981 // `kind` is `TypeKind::Single(JsonType)` or
982 // `TypeKind::Multiple(JsonTypeSet)`. `JsonType` has its
983 // own `Display` impl ("string", "object", etc.).
984 match kind {
985 jsonschema::error::TypeKind::Single(t) => format!("expected type {t}"),
986 jsonschema::error::TypeKind::Multiple(_) => "expected one of multiple types".into(),
987 }
988 }
989 jsonschema::error::ValidationErrorKind::Required { property } => {
990 format!("required field missing: {property}")
991 }
992 jsonschema::error::ValidationErrorKind::AdditionalProperties { unexpected } => {
993 format!("unexpected additional properties: {unexpected:?}")
994 }
995 jsonschema::error::ValidationErrorKind::Enum { options } => {
996 format!("value not in allowed enum: {options}")
997 }
998 jsonschema::error::ValidationErrorKind::MinLength { limit } => {
999 format!("string shorter than min length ({limit})")
1000 }
1001 jsonschema::error::ValidationErrorKind::MaxLength { limit } => {
1002 format!("string longer than max length ({limit})")
1003 }
1004 jsonschema::error::ValidationErrorKind::Minimum { limit } => {
1005 format!("value below minimum ({limit})")
1006 }
1007 jsonschema::error::ValidationErrorKind::Maximum { limit } => {
1008 format!("value above maximum ({limit})")
1009 }
1010 jsonschema::error::ValidationErrorKind::Pattern { pattern } => {
1011 format!("value did not match pattern {pattern}")
1012 }
1013 // Long tail: lean on jsonschema's Display impl, which is the
1014 // built-in human-readable error message ("X is not of type Y").
1015 // Strip trailing newlines so the JSONL line stays one line.
1016 _ => first.to_string().trim().to_string(),
1017 };
1018 // Round 30 — Srikanth on 0.3.173 asked how a deeper nested mismatch
1019 // reads. The prior output printed the WHOLE top-level schema even for
1020 // a single-field mismatch, which buried the actual constraint that
1021 // failed. Walk the instance pointer through the schema's properties
1022 // chain and print the most specific sub-schema we can find. Falls
1023 // back to the full schema for paths the walker can't resolve
1024 // (additionalProperties, oneOf, allOf, $ref un-resolved, etc.).
1025 let focused_schema = sub_schema_at_pointer(schema, path).unwrap_or_else(|| schema.clone());
1026 let schema_str = serde_json::to_string(&focused_schema).unwrap_or_else(|_| "<schema>".into());
1027 let schema_str = if schema_str.len() > 300 {
1028 format!("{}...", &schema_str[..300])
1029 } else {
1030 schema_str
1031 };
1032 // Round 29 — Srikanth on 0.3.172 was confused by `at /:` thinking
1033 // it referenced the URL path; it's actually a JSON pointer into
1034 // the RESPONSE BODY. Reword so that's unambiguous: explicit
1035 // "response body" prefix and a human label for the root case.
1036 let location = if path == "/" {
1037 "response body root".to_string()
1038 } else {
1039 format!("response body at {path}")
1040 };
1041 Some(format!("{location}: {kind_msg}; expected schema {schema_str}"))
1042}
1043
1044/// Round 30 — walk a JSON-Pointer-style instance path through a JSON
1045/// Schema and return the sub-schema describing the value at that
1046/// position. For path `/name/age` on
1047/// `{"properties":{"name":{"properties":{"age":{"type":"integer"}}}}}`
1048/// returns `{"type":"integer"}`. Returns `None` for paths the walker
1049/// can't follow (array indices into `items` with no per-index schema,
1050/// `additionalProperties`, `oneOf`/`allOf`, unresolved `$ref`); callers
1051/// should fall back to the full schema in that case.
1052fn sub_schema_at_pointer(schema: &serde_json::Value, pointer: &str) -> Option<serde_json::Value> {
1053 if pointer.is_empty() || pointer == "/" {
1054 return Some(schema.clone());
1055 }
1056 let mut current = schema;
1057 for seg in pointer.trim_start_matches('/').split('/') {
1058 let unescaped = seg.replace("~1", "/").replace("~0", "~");
1059 if let Some(props) = current.get("properties") {
1060 if let Some(sub) = props.get(&unescaped) {
1061 current = sub;
1062 continue;
1063 }
1064 }
1065 if let Some(items) = current.get("items") {
1066 if items.is_object() {
1067 current = items;
1068 continue;
1069 }
1070 }
1071 return None;
1072 }
1073 Some(current.clone())
1074}
1075
1076/// Round 17.5 — one OWASP injection probe to send.
1077#[derive(Debug, Clone)]
1078struct OwaspProbe {
1079 label: String,
1080 body: Option<String>,
1081 query: Vec<(String, String)>,
1082}
1083
1084/// Build one OWASP probe per `SecurityCategory` for `op`. Targets the
1085/// first query param if any, else the first string field of the
1086/// positive JSON body. Returns empty if neither target is available.
1087fn build_owasp_probes(op: &AnnotatedOperation) -> Vec<OwaspProbe> {
1088 use crate::security_payloads::{SecurityCategory, SecurityPayloads};
1089
1090 let categories = [
1091 SecurityCategory::SqlInjection,
1092 SecurityCategory::Xss,
1093 SecurityCategory::CommandInjection,
1094 SecurityCategory::PathTraversal,
1095 SecurityCategory::Ssti,
1096 SecurityCategory::LdapInjection,
1097 SecurityCategory::Xxe,
1098 ];
1099
1100 // Pick an injection target ONCE per operation; reuse it across
1101 // categories. (A single op gets up to 7 probes — one per category
1102 // — all attacking the same field.)
1103 let injection_target = pick_injection_target(op);
1104 let Some(target) = injection_target else {
1105 return Vec::new();
1106 };
1107
1108 let mut probes = Vec::new();
1109 for cat in categories {
1110 // Take the *first* payload from each category. The
1111 // collection's first entry is the canonical low-risk
1112 // representative; later entries include time-based / blind
1113 // probes that aren't useful as a one-shot rejection test.
1114 let Some(payload) = SecurityPayloads::get_by_category(cat).into_iter().next() else {
1115 continue;
1116 };
1117 let mut query = op.query_params.clone();
1118 let mut body = op.sample_body.clone();
1119 match &target {
1120 InjectionTarget::Query(idx) => {
1121 if let Some(slot) = query.get_mut(*idx) {
1122 slot.1 = payload.payload.clone();
1123 }
1124 }
1125 InjectionTarget::BodyStringField(field) => {
1126 body = inject_into_body_field(body.as_deref(), field, &payload.payload);
1127 }
1128 }
1129 probes.push(OwaspProbe {
1130 label: format!("owasp:{}", cat),
1131 body,
1132 query,
1133 });
1134 }
1135 probes
1136}
1137
1138#[derive(Debug, Clone)]
1139enum InjectionTarget {
1140 Query(usize),
1141 BodyStringField(String),
1142}
1143
1144fn pick_injection_target(op: &AnnotatedOperation) -> Option<InjectionTarget> {
1145 if !op.query_params.is_empty() {
1146 return Some(InjectionTarget::Query(0));
1147 }
1148 let sample = op.sample_body.as_deref()?;
1149 let parsed: serde_json::Value = serde_json::from_str(sample).ok()?;
1150 let obj = parsed.as_object()?;
1151 for (k, v) in obj {
1152 if v.is_string() {
1153 return Some(InjectionTarget::BodyStringField(k.clone()));
1154 }
1155 }
1156 None
1157}
1158
1159/// Replace the value of `field` in a JSON-object body with `payload`.
1160/// Returns the mutated body as a JSON string. Returns `None` if the
1161/// body doesn't parse as a JSON object.
1162fn inject_into_body_field(body: Option<&str>, field: &str, payload: &str) -> Option<String> {
1163 let raw = body?;
1164 let mut parsed: serde_json::Value = serde_json::from_str(raw).ok()?;
1165 let obj = parsed.as_object_mut()?;
1166 obj.insert(field.to_string(), serde_json::json!(payload));
1167 serde_json::to_string(&parsed).ok()
1168}
1169
1170#[allow(clippy::too_many_arguments)]
1171/// Round 17.3 — one synthesised bad credential to send.
1172#[derive(Debug, Clone)]
1173struct SecurityProbe {
1174 /// Self-test label, e.g. `security:bad-bearer`.
1175 label: String,
1176 /// Headers to attach to the probe request.
1177 headers: Vec<(String, String)>,
1178 /// Query parameters to attach (API key in query case).
1179 query: Vec<(String, String)>,
1180}
1181
1182/// For each declared security scheme, produce one bad-credential
1183/// probe plus a single "no auth at all" probe that exercises the
1184/// missing-credential code path. Deduplicates by scheme kind so an
1185/// operation declaring `[bearer, bearer]` only yields one Bearer
1186/// probe.
1187fn build_security_probes(schemes: &[SecuritySchemeInfo]) -> Vec<SecurityProbe> {
1188 if schemes.is_empty() {
1189 return Vec::new();
1190 }
1191 let mut probes: Vec<SecurityProbe> = Vec::new();
1192 let mut seen_bearer = false;
1193 let mut seen_basic = false;
1194 // `(loc_tag, name)` — ApiKeyLocation doesn't implement Ord, so
1195 // we tag it with a short discriminant string for dedup.
1196 let mut seen_apikey: std::collections::BTreeSet<(&'static str, String)> = Default::default();
1197 for s in schemes {
1198 match s {
1199 SecuritySchemeInfo::Bearer if !seen_bearer => {
1200 seen_bearer = true;
1201 probes.push(SecurityProbe {
1202 label: "security:bad-bearer".into(),
1203 headers: vec![(
1204 "Authorization".into(),
1205 "Bearer self-test-invalid-token".into(),
1206 )],
1207 query: Vec::new(),
1208 });
1209 }
1210 SecuritySchemeInfo::Basic if !seen_basic => {
1211 seen_basic = true;
1212 // base64("self-test:invalid") — valid base64, wrong creds.
1213 probes.push(SecurityProbe {
1214 label: "security:bad-basic".into(),
1215 headers: vec![(
1216 "Authorization".into(),
1217 "Basic c2VsZi10ZXN0OmludmFsaWQ=".into(),
1218 )],
1219 query: Vec::new(),
1220 });
1221 }
1222 SecuritySchemeInfo::ApiKey { location, name } => {
1223 let loc_tag = match location {
1224 ApiKeyLocation::Header => "header",
1225 ApiKeyLocation::Query => "query",
1226 ApiKeyLocation::Cookie => "cookie",
1227 };
1228 if seen_apikey.contains(&(loc_tag, name.clone())) {
1229 continue;
1230 }
1231 seen_apikey.insert((loc_tag, name.clone()));
1232 let label = format!("security:bad-apikey:{}", name);
1233 let bad = "self-test-invalid-key".to_string();
1234 match location {
1235 ApiKeyLocation::Header => probes.push(SecurityProbe {
1236 label,
1237 headers: vec![(name.clone(), bad)],
1238 query: Vec::new(),
1239 }),
1240 ApiKeyLocation::Query => probes.push(SecurityProbe {
1241 label,
1242 headers: Vec::new(),
1243 query: vec![(name.clone(), bad)],
1244 }),
1245 ApiKeyLocation::Cookie => probes.push(SecurityProbe {
1246 label,
1247 headers: vec![("Cookie".into(), format!("{}={}", name, bad))],
1248 query: Vec::new(),
1249 }),
1250 }
1251 }
1252 _ => {}
1253 }
1254 }
1255 // Always add a "no auth at all" probe when *any* security scheme
1256 // is declared — useful even if all schemes failed to resolve to a
1257 // testable kind, because it surfaces validators that aren't
1258 // checking auth presence at all.
1259 probes.push(SecurityProbe {
1260 label: "security:no-auth".into(),
1261 headers: Vec::new(),
1262 query: Vec::new(),
1263 });
1264 probes
1265}
1266
1267/// Remove Authorization and any API-key headers declared by the
1268/// operation's security schemes from `headers`, so a security probe
1269/// can supply its own credential (or none) cleanly.
1270fn strip_auth(
1271 headers: &[(String, String)],
1272 schemes: &[SecuritySchemeInfo],
1273) -> Vec<(String, String)> {
1274 let mut apikey_headers: std::collections::BTreeSet<String> = Default::default();
1275 for s in schemes {
1276 if let SecuritySchemeInfo::ApiKey {
1277 location: ApiKeyLocation::Header,
1278 name,
1279 } = s
1280 {
1281 apikey_headers.insert(name.to_lowercase());
1282 }
1283 if let SecuritySchemeInfo::ApiKey {
1284 location: ApiKeyLocation::Cookie,
1285 ..
1286 } = s
1287 {
1288 apikey_headers.insert("cookie".into());
1289 }
1290 }
1291 headers
1292 .iter()
1293 .filter(|(k, _)| {
1294 let lk = k.to_lowercase();
1295 lk != "authorization" && !apikey_headers.contains(&lk)
1296 })
1297 .cloned()
1298 .collect()
1299}
1300
1301/// Remove API-key query parameters declared by the operation's
1302/// security schemes from `query`, so a probe can supply its own.
1303fn strip_auth_query(
1304 query: &[(String, String)],
1305 schemes: &[SecuritySchemeInfo],
1306) -> Vec<(String, String)> {
1307 let mut apikey_query: std::collections::BTreeSet<String> = Default::default();
1308 for s in schemes {
1309 if let SecuritySchemeInfo::ApiKey {
1310 location: ApiKeyLocation::Query,
1311 name,
1312 } = s
1313 {
1314 apikey_query.insert(name.clone());
1315 }
1316 }
1317 query.iter().filter(|(k, _)| !apikey_query.contains(k)).cloned().collect()
1318}
1319
1320/// Variant of `send_case` that takes an explicit `extra_headers`
1321/// (rather than reading them from `config`). Used by security probes
1322/// to substitute or strip the configured Authorization header.
1323#[allow(clippy::too_many_arguments)]
1324async fn send_case_with_extra(
1325 client: &Client,
1326 config: &SelfTestConfig,
1327 method: Method,
1328 url: &str,
1329 label: &str,
1330 expected_4xx: bool,
1331 body: Option<&str>,
1332 query: Vec<(String, String)>,
1333 headers: Vec<(String, String)>,
1334 extra_headers: Vec<(String, String)>,
1335) -> CaseOutcome {
1336 let mut req = client.request(method.clone(), url);
1337 let mut capture_headers: BTreeMap<String, String> = BTreeMap::new();
1338 for (k, v) in &query {
1339 req = req.query(&[(k.as_str(), v.as_str())]);
1340 }
1341 // Round 28 — reqwest's `.header(k, v)` APPENDS rather than replaces
1342 // (.headers().insert() would replace but isn't on the builder).
1343 // The previous round-25 fix relied on "last-write-wins" semantics
1344 // that don't exist; for content-type-swap probes the request went
1345 // out with BOTH `Content-Type: application/json` AND `Content-Type:
1346 // application/xml`, and axum's `Json<>` extractor picked the JSON
1347 // one and accepted, so the server-side validator never saw the
1348 // mismatch. Build a `HeaderMap` ourselves so the override
1349 // replaces the body-block default exactly once.
1350 let mut final_headers: reqwest::header::HeaderMap = reqwest::header::HeaderMap::new();
1351 if let Some(_b) = body {
1352 if let Ok(v) = reqwest::header::HeaderValue::from_str("application/json") {
1353 final_headers.insert(reqwest::header::CONTENT_TYPE, v);
1354 }
1355 capture_headers.insert("Content-Type".to_string(), "application/json".to_string());
1356 }
1357 for (k, v) in &headers {
1358 if let (Ok(hn), Ok(hv)) = (
1359 reqwest::header::HeaderName::from_bytes(k.as_bytes()),
1360 reqwest::header::HeaderValue::from_str(v),
1361 ) {
1362 final_headers.insert(hn, hv);
1363 }
1364 capture_headers.insert(k.clone(), v.clone());
1365 }
1366 for (k, v) in &extra_headers {
1367 if let (Ok(hn), Ok(hv)) = (
1368 reqwest::header::HeaderName::from_bytes(k.as_bytes()),
1369 reqwest::header::HeaderValue::from_str(v),
1370 ) {
1371 final_headers.insert(hn, hv);
1372 }
1373 capture_headers.insert(k.clone(), v.clone());
1374 }
1375 if let Some(b) = body {
1376 req = req.body(b.to_string());
1377 }
1378 req = req.headers(final_headers);
1379 let (actual_status, response_capture) = match req.send().await {
1380 Ok(resp) => {
1381 let status = resp.status().as_u16();
1382 if let Some(sink) = &config.capture {
1383 let resp_headers: BTreeMap<String, String> = resp
1384 .headers()
1385 .iter()
1386 .map(|(k, v)| (k.as_str().to_string(), v.to_str().unwrap_or("").to_string()))
1387 .collect();
1388 let text = resp.text().await.unwrap_or_default();
1389 let (rb, truncated) = truncate_body_for_capture(&text);
1390 (status, Some((Some((rb, truncated)), resp_headers, None, sink.clone())))
1391 } else {
1392 (status, None)
1393 }
1394 }
1395 Err(e) => {
1396 let err_str = e.to_string();
1397 if let Some(sink) = &config.capture {
1398 (0, Some((None, BTreeMap::new(), Some(err_str), sink.clone())))
1399 } else {
1400 (0, None)
1401 }
1402 }
1403 };
1404 let passed = if expected_4xx {
1405 (400..500).contains(&actual_status)
1406 } else {
1407 (200..400).contains(&actual_status)
1408 };
1409 if let Some((resp_body, resp_headers, error, sink)) = response_capture {
1410 let (request_body, request_body_truncated) = match body {
1411 Some(b) => {
1412 let (rb, t) = truncate_body_for_capture(b);
1413 (Some(rb), t)
1414 }
1415 None => (None, false),
1416 };
1417 let (response_body, response_body_truncated) = match resp_body {
1418 Some((rb, t)) => (Some(rb), t),
1419 None => (None, false),
1420 };
1421 let entry = CaseCapture {
1422 label: label.to_string(),
1423 method: method.to_string(),
1424 url: build_query_url(url, &query),
1425 request_headers: capture_headers,
1426 request_body,
1427 request_body_truncated,
1428 response_status: actual_status,
1429 response_headers: resp_headers,
1430 response_body,
1431 response_body_truncated,
1432 error,
1433 // Filled in by the per-operation validation pass after
1434 // every probe finishes; the capture itself is unaware of
1435 // the schema map.
1436 response_schema_error: None,
1437 // Round 28 — derive the expected range from the probe's
1438 // `expected_4xx` flag so the JSONL line and HTML viewer
1439 // can show mismatches without re-deriving on the read side.
1440 expected_status_range: if expected_4xx {
1441 "4xx".into()
1442 } else {
1443 "2xx-3xx".into()
1444 },
1445 };
1446 if let Ok(mut guard) = sink.lock() {
1447 guard.push(entry);
1448 }
1449 }
1450 CaseOutcome {
1451 label: label.to_string(),
1452 expected_4xx,
1453 actual_status,
1454 passed,
1455 }
1456}
1457
1458// HTTP request shape needs all of these: client, config (for capture
1459// sink + extra headers), method, url, label (probe id), expected_4xx
1460// (pass/fail decision), body, query, headers. A struct wrapper would
1461// just move the arity from positional to field access without making
1462// the call sites clearer.
1463#[allow(clippy::too_many_arguments)]
1464async fn send_case(
1465 client: &Client,
1466 config: &SelfTestConfig,
1467 method: Method,
1468 url: &str,
1469 label: &str,
1470 expected_4xx: bool,
1471 body: Option<&str>,
1472 query: Vec<(String, String)>,
1473 headers: Vec<(String, String)>,
1474) -> CaseOutcome {
1475 // Forwarding to `send_case_with_extra` keeps the capture logic in
1476 // one place so request/response tracing can't drift between the
1477 // two entrypoints.
1478 send_case_with_extra(
1479 client,
1480 config,
1481 method,
1482 url,
1483 label,
1484 expected_4xx,
1485 body,
1486 query,
1487 headers,
1488 config.extra_headers.clone(),
1489 )
1490 .await
1491}
1492
1493/// Round 23 (c-iii) — rebuild the query-stringified URL for capture so
1494/// the JSONL trace shows the URL that actually went over the wire
1495/// (reqwest applies `.query(..)` after the request URL string is
1496/// rendered, so capturing the raw `url` argument alone loses the
1497/// query params).
1498fn build_query_url(base: &str, query: &[(String, String)]) -> String {
1499 if query.is_empty() {
1500 return base.to_string();
1501 }
1502 let qs: String = query
1503 .iter()
1504 .map(|(k, v)| format!("{}={}", urlencoding::encode(k), urlencoding::encode(v)))
1505 .collect::<Vec<_>>()
1506 .join("&");
1507 if base.contains('?') {
1508 format!("{base}&{qs}")
1509 } else {
1510 format!("{base}?{qs}")
1511 }
1512}
1513
1514/// Substitute `{param}` placeholders in the spec path with their
1515/// sample values from `path_params`, then prepend `target_url`. Empty
1516/// values are kept as `{param}` so an upstream router still matches
1517/// the template — useful when `path_params` is empty and we want to
1518/// hit the same route the spec defines.
1519///
1520/// All current call sites went through `build_url_with_base` after
1521/// round 18.1, so this no-base-path helper is unused; keep it as the
1522/// documented shim for future external callers (one-arg simplification).
1523#[allow(dead_code)]
1524fn build_url(target: &str, path_template: &str, path_params: &[(String, String)]) -> String {
1525 build_url_with_base(target, None, path_template, path_params)
1526}
1527
1528/// Round 18.1 — variant of `build_url` that takes a `base_path`
1529/// (e.g. `Some("/api")`). When set, prepends it to the spec path so a
1530/// spec declaring `/users` against a target served behind `/api`
1531/// resolves to `<target>/api/users`. `base_path` is normalised: leading
1532/// `/` is auto-added, trailing `/` is stripped.
1533fn build_url_with_base(
1534 target: &str,
1535 base_path: Option<&str>,
1536 path_template: &str,
1537 path_params: &[(String, String)],
1538) -> String {
1539 let mut url = path_template.to_string();
1540 for (name, value) in path_params {
1541 let placeholder = format!("{{{}}}", name);
1542 if !value.is_empty() {
1543 url = url.replace(&placeholder, value);
1544 }
1545 }
1546 let target = target.trim_end_matches('/');
1547 let prefix = match base_path {
1548 Some(bp) if !bp.is_empty() => {
1549 let trimmed = bp.trim_end_matches('/');
1550 if trimmed.starts_with('/') {
1551 trimmed.to_string()
1552 } else {
1553 format!("/{}", trimmed)
1554 }
1555 }
1556 _ => String::new(),
1557 };
1558 let path = if url.starts_with('/') {
1559 url
1560 } else {
1561 format!("/{url}")
1562 };
1563 format!("{target}{prefix}{path}")
1564}
1565
1566#[cfg(test)]
1567mod tests {
1568 use super::*;
1569
1570 fn op(
1571 method: &str,
1572 path: &str,
1573 body: Option<&str>,
1574 query: Vec<(&str, &str)>,
1575 headers: Vec<(&str, &str)>,
1576 path_params: Vec<(&str, &str)>,
1577 ) -> AnnotatedOperation {
1578 AnnotatedOperation {
1579 method: method.into(),
1580 path: path.into(),
1581 features: Vec::new(),
1582 request_body_content_type: body.map(|_| "application/json".into()),
1583 sample_body: body.map(|s| s.to_string()),
1584 query_params: query.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1585 header_params: headers.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1586 path_params: path_params.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1587 response_schema: None,
1588 response_schemas: std::collections::BTreeMap::new(),
1589 request_body_schema: None,
1590 security_schemes: Vec::new(),
1591 }
1592 }
1593
1594 #[test]
1595 fn build_url_substitutes_path_params() {
1596 let url = build_url(
1597 "https://api.test/",
1598 "/users/{id}/posts/{pid}",
1599 &[("id".into(), "42".into()), ("pid".into(), "7".into())],
1600 );
1601 assert_eq!(url, "https://api.test/users/42/posts/7");
1602 }
1603
1604 /// Round 18.1 — a run where every positive 404s should be flagged
1605 /// as a likely target misconfiguration, not silently treated as a
1606 /// successful conformance run.
1607 #[test]
1608 fn detect_target_misconfiguration_when_all_positives_share_status() {
1609 let mut report = SelfTestReport {
1610 positive_pass: 0,
1611 positive_fail: 50,
1612 ..Default::default()
1613 };
1614 for i in 0..50 {
1615 report.operations.push(OperationResult {
1616 method: "GET".into(),
1617 path: format!("/r/{i}"),
1618 positive: Some(CaseOutcome {
1619 label: "positive".into(),
1620 expected_4xx: false,
1621 actual_status: 404,
1622 passed: false,
1623 }),
1624 negatives: Vec::new(),
1625 });
1626 }
1627 assert_eq!(report.detect_target_misconfiguration(), Some(404));
1628 }
1629
1630 #[test]
1631 fn detect_target_misconfiguration_returns_none_when_some_pass() {
1632 let mut report = SelfTestReport {
1633 positive_pass: 5,
1634 positive_fail: 50,
1635 ..Default::default()
1636 };
1637 for i in 0..55 {
1638 report.operations.push(OperationResult {
1639 method: "GET".into(),
1640 path: format!("/r/{i}"),
1641 positive: Some(CaseOutcome {
1642 label: "positive".into(),
1643 expected_4xx: false,
1644 actual_status: if i < 5 { 200 } else { 404 },
1645 passed: i < 5,
1646 }),
1647 negatives: Vec::new(),
1648 });
1649 }
1650 assert_eq!(report.detect_target_misconfiguration(), None);
1651 }
1652
1653 /// Round 18.1 — `--base-path /api` should prepend `/api` to
1654 /// every spec path. Pre-fix, the self-test ignored base_path and
1655 /// 404'd every positive when the deployed API was behind a path
1656 /// prefix.
1657 #[test]
1658 fn build_url_applies_base_path_when_present() {
1659 let url = build_url_with_base(
1660 "https://api.example.com",
1661 Some("/api"),
1662 "/users/{id}",
1663 &[("id".into(), "42".into())],
1664 );
1665 assert_eq!(url, "https://api.example.com/api/users/42");
1666 }
1667
1668 /// Round 18.1 — base_path is normalised: missing leading slash
1669 /// gets one added, trailing slash is stripped, empty string is
1670 /// the same as None.
1671 #[test]
1672 fn build_url_normalises_base_path() {
1673 let no_slash = build_url_with_base("https://t", Some("api"), "/x", &[]);
1674 assert_eq!(no_slash, "https://t/api/x");
1675 let trailing = build_url_with_base("https://t", Some("/api/"), "/x", &[]);
1676 assert_eq!(trailing, "https://t/api/x");
1677 let empty = build_url_with_base("https://t", Some(""), "/x", &[]);
1678 assert_eq!(empty, "https://t/x");
1679 let none = build_url_with_base("https://t", None, "/x", &[]);
1680 assert_eq!(none, "https://t/x");
1681 }
1682
1683 #[test]
1684 fn build_url_keeps_placeholders_when_no_sample() {
1685 let url = build_url("https://api.test", "/users/{id}", &[]);
1686 assert_eq!(url, "https://api.test/users/{id}");
1687 }
1688
1689 #[test]
1690 fn report_summary_calls_out_misses() {
1691 let r = SelfTestReport {
1692 positive_pass: 3,
1693 positive_fail: 0,
1694 negative_caught: BTreeMap::from([("request-body".into(), 2)]),
1695 negative_missed: BTreeMap::from([("request-body".into(), 1)]),
1696 operations: Vec::new(),
1697 };
1698 let summary = r.render_summary();
1699 assert!(summary.contains("Positives: 3 pass / 0 fail"));
1700 assert!(summary.contains("Negatives [request-body]: 2 caught / 1 missed"));
1701 assert!(summary.contains("⚠"));
1702 assert!(!r.all_passed());
1703 }
1704
1705 #[test]
1706 fn report_all_passed_when_no_miss() {
1707 let r = SelfTestReport {
1708 positive_pass: 5,
1709 positive_fail: 0,
1710 negative_caught: BTreeMap::from([("parameters".into(), 3)]),
1711 negative_missed: BTreeMap::new(),
1712 operations: Vec::new(),
1713 };
1714 assert!(r.all_passed());
1715 assert!(r.render_summary().contains("✓"));
1716 }
1717
1718 #[tokio::test]
1719 async fn run_self_test_against_unreachable_target_marks_all_failed() {
1720 // Use an obviously-dead port so we exercise the timeout/error
1721 // path without needing a live server in tests.
1722 let cfg = SelfTestConfig {
1723 target_url: "http://127.0.0.1:1".into(),
1724 timeout: Duration::from_millis(200),
1725 ..Default::default()
1726 };
1727 let ops = vec![op(
1728 "POST",
1729 "/users",
1730 Some("{\"name\":\"a\"}"),
1731 vec![],
1732 vec![],
1733 vec![],
1734 )];
1735 let report = run_self_test(&ops, &cfg).await.expect("client builds");
1736 // All cases hit the connect-error path → actual_status=0.
1737 // Positive expects 2xx-3xx → 0 is fail. Negatives expect 4xx
1738 // → 0 is also fail (we missed catching).
1739 assert_eq!(report.positive_fail, 1);
1740 assert!(report.negative_missed.values().sum::<usize>() >= 1);
1741 assert!(!report.all_passed());
1742 }
1743
1744 /// Round 17.2 — operations with both a positive sample AND a
1745 /// resolved request-body schema produce schema-driven negatives
1746 /// in addition to the spec-agnostic empty/wrong-type ones. The
1747 /// labels carry the field path so a per-category report can tell
1748 /// you exactly which field caught.
1749 #[tokio::test]
1750 async fn schema_driven_negatives_fire_when_schema_present() {
1751 use openapiv3::{ObjectType, ReferenceOr, Schema, SchemaData, SchemaKind, Type};
1752 let cfg = SelfTestConfig {
1753 target_url: "http://127.0.0.1:1".into(),
1754 timeout: Duration::from_millis(200),
1755 ..Default::default()
1756 };
1757 // Build an operation whose schema has a required `name` string
1758 // and an `age` integer. The mutator should produce, at
1759 // minimum: required-removed:name, required-removed:age,
1760 // type-mismatch:name, type-mismatch:age, integer-as-float:age,
1761 // plus the root-level type-mismatch.
1762 let mut obj = ObjectType::default();
1763 obj.properties.insert(
1764 "name".to_string(),
1765 ReferenceOr::Item(Box::new(Schema {
1766 schema_data: SchemaData::default(),
1767 schema_kind: SchemaKind::Type(Type::String(Default::default())),
1768 })),
1769 );
1770 obj.properties.insert(
1771 "age".to_string(),
1772 ReferenceOr::Item(Box::new(Schema {
1773 schema_data: SchemaData::default(),
1774 schema_kind: SchemaKind::Type(Type::Integer(Default::default())),
1775 })),
1776 );
1777 obj.required = vec!["name".into(), "age".into()];
1778 let schema = Schema {
1779 schema_data: SchemaData::default(),
1780 schema_kind: SchemaKind::Type(Type::Object(obj)),
1781 };
1782
1783 let mut o =
1784 op("POST", "/users", Some(r#"{"name":"Ada","age":30}"#), vec![], vec![], vec![]);
1785 o.request_body_schema = Some(schema);
1786 let report = run_self_test(&[o], &cfg).await.expect("client builds");
1787 // Bucket labels from the operation result.
1788 let labels: std::collections::BTreeSet<String> = report
1789 .operations
1790 .iter()
1791 .flat_map(|op| op.negatives.iter().map(|n| n.label.clone()))
1792 .collect();
1793 assert!(
1794 labels.iter().any(|l| l.starts_with("request-body:type-mismatch:")),
1795 "missing type-mismatch negative; got {labels:?}"
1796 );
1797 assert!(
1798 labels.iter().any(|l| l.starts_with("request-body:required-removed:")),
1799 "missing required-removed negative; got {labels:?}"
1800 );
1801 assert!(
1802 labels.iter().any(|l| l == "parameters:uri-too-long"),
1803 "missing URI-length negative; got {labels:?}"
1804 );
1805 }
1806
1807 /// Round 16 — operations with a body OR a path-param now produce
1808 /// negatives even without a sample body. Previously a POST whose
1809 /// body annotator failed produced *zero* negatives, so the self-test
1810 /// always reported "all passing" for that endpoint.
1811 #[tokio::test]
1812 async fn no_sample_body_still_produces_request_body_negatives() {
1813 let cfg = SelfTestConfig {
1814 target_url: "http://127.0.0.1:1".into(),
1815 timeout: Duration::from_millis(200),
1816 ..Default::default()
1817 };
1818 // POST with a body content type but no sample (annotator gap).
1819 let ops = vec![op("POST", "/x", None, vec![], vec![], vec![])];
1820 // No sample_body but request_body_content_type set:
1821 let mut ops_fixed = ops;
1822 ops_fixed[0].request_body_content_type = Some("application/json".into());
1823 let report = run_self_test(&ops_fixed, &cfg).await.expect("client builds");
1824 // Both request-body negatives (empty + wrong-type) should fire,
1825 // landing in `negative_missed` because the unreachable target
1826 // returns no 4xx. The point: count > 0.
1827 assert!(
1828 report.negative_missed.values().sum::<usize>() >= 2,
1829 "expected ≥2 request-body negatives, got {:?}",
1830 report.negative_missed
1831 );
1832 }
1833
1834 /// Round 16 — operations with a path-param now get a probe even
1835 /// when there's no body / required query / required header.
1836 /// Previously `/teams/{team-id}` with no other required fields
1837 /// produced zero negatives → always "all passing".
1838 #[tokio::test]
1839 async fn path_param_only_endpoint_produces_a_probe() {
1840 let cfg = SelfTestConfig {
1841 target_url: "http://127.0.0.1:1".into(),
1842 timeout: Duration::from_millis(200),
1843 ..Default::default()
1844 };
1845 let ops = vec![op(
1846 "GET",
1847 "/teams/{team-id}",
1848 None,
1849 vec![],
1850 vec![],
1851 vec![("team-id", "1")],
1852 )];
1853 let report = run_self_test(&ops, &cfg).await.expect("client builds");
1854 let total: usize = report.negative_caught.values().sum::<usize>()
1855 + report.negative_missed.values().sum::<usize>();
1856 assert!(total >= 1, "expected ≥1 path-param probe, got {:?}", report);
1857 }
1858
1859 /// Round 18.5 — when `geo_ip` is set, every default forwarded-
1860 /// IP header gets the IP appended (X-Forwarded-For,
1861 /// True-Client-IP, CF-Connecting-IP).
1862 #[test]
1863 fn effective_op_headers_appends_geo_ip_to_default_headers() {
1864 let ip: IpAddr = "203.0.113.42".parse().unwrap();
1865 let headers = effective_op_headers(
1866 &[("Accept".into(), "application/json".into())],
1867 Some(ip),
1868 &default_geo_source_headers(),
1869 );
1870 let names: Vec<&str> = headers.iter().map(|(k, _)| k.as_str()).collect();
1871 assert!(names.contains(&"Accept"));
1872 assert!(names.contains(&"X-Forwarded-For"));
1873 assert!(names.contains(&"True-Client-IP"));
1874 assert!(names.contains(&"CF-Connecting-IP"));
1875 // Every geo header carries the same IP value.
1876 let geo_values: Vec<&str> =
1877 headers.iter().filter(|(k, _)| k != "Accept").map(|(_, v)| v.as_str()).collect();
1878 for v in geo_values {
1879 assert_eq!(v, "203.0.113.42");
1880 }
1881 }
1882
1883 /// Round 18.5 — operations that already declare a forwarded-IP
1884 /// header (rare but legal — some specs hard-code one) keep their
1885 /// declared value; we don't clobber the spec.
1886 #[test]
1887 fn effective_op_headers_respects_spec_declared_header() {
1888 let ip: IpAddr = "203.0.113.99".parse().unwrap();
1889 let headers = effective_op_headers(
1890 &[("x-forwarded-for".into(), "10.0.0.1".into())],
1891 Some(ip),
1892 &["X-Forwarded-For".to_string()],
1893 );
1894 // The spec's lower-case value wins; we shouldn't add a
1895 // second X-Forwarded-For row that overrides it.
1896 let xff: Vec<&str> = headers
1897 .iter()
1898 .filter(|(k, _)| k.eq_ignore_ascii_case("x-forwarded-for"))
1899 .map(|(_, v)| v.as_str())
1900 .collect();
1901 assert_eq!(xff, vec!["10.0.0.1"]);
1902 }
1903
1904 /// Round 18.5 — None geo_ip and/or empty header list is a no-op.
1905 #[test]
1906 fn effective_op_headers_is_a_noop_without_geo_ip() {
1907 let base = vec![("Accept".into(), "json".into())];
1908 let h1 = effective_op_headers(&base, None, &default_geo_source_headers());
1909 assert_eq!(h1, base);
1910 let ip: IpAddr = "10.0.0.1".parse().unwrap();
1911 let h2 = effective_op_headers(&base, Some(ip), &[]);
1912 assert_eq!(h2, base);
1913 }
1914
1915 /// Round 18.5 — empty `source_ips` builds a single default
1916 /// client; a non-empty list builds N clients each attempting to
1917 /// bind. We can't reliably test the actual bind on CI (no
1918 /// loopback aliases), but a loopback IP is always bind-able.
1919 #[test]
1920 fn build_client_pool_one_per_source_ip() {
1921 let mut cfg = SelfTestConfig {
1922 target_url: "http://127.0.0.1:1".into(),
1923 timeout: Duration::from_millis(200),
1924 ..Default::default()
1925 };
1926 // Empty → one default client.
1927 assert_eq!(build_client_pool(&cfg).expect("default builds").len(), 1);
1928 // Non-empty → one per IP. Loopback bind is portable.
1929 cfg.source_ips = vec!["127.0.0.1".parse().unwrap()];
1930 assert_eq!(build_client_pool(&cfg).expect("bind loopback").len(), 1);
1931 }
1932
1933 /// Round 18.5 — geo IPs round-robin across operations. Hits an
1934 /// unreachable target so we can inspect the case outcomes; the
1935 /// point is to confirm `op_headers` carried the geo IP through
1936 /// (CaseOutcome doesn't surface headers directly, so we just
1937 /// verify the run completes without panicking and the result
1938 /// shape is correct when source_ips is non-empty too).
1939 #[tokio::test]
1940 async fn run_self_test_with_geo_source_completes() {
1941 let cfg = SelfTestConfig {
1942 target_url: "http://127.0.0.1:1".into(),
1943 timeout: Duration::from_millis(200),
1944 geo_source_ips: vec![
1945 "203.0.113.1".parse().unwrap(),
1946 "203.0.113.2".parse().unwrap(),
1947 ],
1948 ..Default::default()
1949 };
1950 let ops = vec![
1951 op("GET", "/a", None, vec![], vec![], vec![]),
1952 op("GET", "/b", None, vec![], vec![], vec![]),
1953 op("GET", "/c", None, vec![], vec![], vec![]),
1954 ];
1955 let report = run_self_test(&ops, &cfg).await.expect("client builds");
1956 assert_eq!(report.operations.len(), 3);
1957 }
1958
1959 /// Round 24 (f) — Srikanth saw the geo header on positive probes
1960 /// only; the four negative-probe call sites were passing
1961 /// `op.header_params` directly instead of `op_headers`, so the
1962 /// geo IP got dropped. This test runs a self-test that includes
1963 /// negative probes (uri-too-long, missing-query, etc.) under
1964 /// `--conformance-self-test-capture`, then asserts that EVERY
1965 /// captured probe (positive AND negative) carries one of the
1966 /// configured forwarded-IP headers.
1967 #[tokio::test]
1968 async fn geo_headers_present_on_every_probe_with_capture() {
1969 let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
1970 let cfg = SelfTestConfig {
1971 target_url: "http://127.0.0.1:1".into(),
1972 timeout: Duration::from_millis(50),
1973 geo_source_ips: vec!["203.0.113.5".parse().unwrap()],
1974 capture: Some(sink.clone()),
1975 ..Default::default()
1976 };
1977 // An operation rich enough to trip several negative-probe
1978 // branches: header param (→ missing-header), query param
1979 // (→ missing-query), and a sample body (→ schema mutations
1980 // wouldn't fire without a schema, but uri-too-long always
1981 // does).
1982 let ops = vec![op(
1983 "GET",
1984 "/items",
1985 Some("{}"),
1986 vec![("id", "1")],
1987 vec![("X-Trace", "x")],
1988 vec![],
1989 )];
1990 let _ = run_self_test(&ops, &cfg).await.expect("client builds");
1991 let captures = sink.lock().unwrap();
1992 assert!(!captures.is_empty(), "self-test should record probes");
1993 // For every captured probe, at least one of the default geo
1994 // headers must be present and equal to the configured IP.
1995 let geo_headers: std::collections::HashSet<&str> =
1996 ["X-Forwarded-For", "True-Client-IP", "CF-Connecting-IP"].into_iter().collect();
1997 for c in captures.iter() {
1998 let has_geo = c
1999 .request_headers
2000 .iter()
2001 .any(|(k, v)| geo_headers.contains(k.as_str()) && v == "203.0.113.5");
2002 assert!(
2003 has_geo,
2004 "probe `{}` is missing the geo IP header; got headers: {:?}",
2005 c.label, c.request_headers
2006 );
2007 }
2008 }
2009
2010 /// Round 25 (k) — operations with a JSON request body now get four
2011 /// content-type-swap probes (xml / yaml / multipart / urlencoded).
2012 /// Verify they:
2013 /// 1. fire only when the operation declares a JSON body
2014 /// 2. carry the wrong Content-Type the probe is testing for
2015 /// 3. don't fire on body-less operations
2016 #[tokio::test]
2017 async fn content_type_swap_probes_fire_for_json_bodies() {
2018 let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2019 let cfg = SelfTestConfig {
2020 target_url: "http://127.0.0.1:1".into(),
2021 timeout: Duration::from_millis(50),
2022 capture: Some(sink.clone()),
2023 ..Default::default()
2024 };
2025 let ops = vec![
2026 op("POST", "/users", Some("{\"name\":\"a\"}"), vec![], vec![], vec![]),
2027 op("GET", "/ping", None, vec![], vec![], vec![]),
2028 ];
2029 let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2030 let captures = sink.lock().unwrap();
2031
2032 let swap_labels: Vec<&str> = captures
2033 .iter()
2034 .filter(|c| c.label.starts_with("request-body:content-type-mismatch:"))
2035 .map(|c| c.label.as_str())
2036 .collect();
2037 assert_eq!(
2038 swap_labels.len(),
2039 4,
2040 "expected 4 content-type-swap probes (one per variant), got: {swap_labels:?}"
2041 );
2042 let expected_labels = [
2043 "request-body:content-type-mismatch:xml",
2044 "request-body:content-type-mismatch:yaml",
2045 "request-body:content-type-mismatch:multipart",
2046 "request-body:content-type-mismatch:urlencoded",
2047 ];
2048 for want in expected_labels {
2049 assert!(swap_labels.contains(&want), "missing swap probe `{want}`");
2050 }
2051
2052 // Each swap probe must carry the wrong Content-Type it's
2053 // testing for — that's the whole point.
2054 for c in captures.iter() {
2055 let Some(suffix) = c.label.strip_prefix("request-body:content-type-mismatch:") else {
2056 continue;
2057 };
2058 let want_ct = match suffix {
2059 "xml" => "application/xml",
2060 "yaml" => "application/yaml",
2061 "multipart" => "multipart/form-data",
2062 "urlencoded" => "application/x-www-form-urlencoded",
2063 _ => continue,
2064 };
2065 let got_ct = c
2066 .request_headers
2067 .iter()
2068 .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
2069 .map(|(_, v)| v.as_str())
2070 .unwrap_or("");
2071 assert_eq!(got_ct, want_ct, "swap probe `{}` sent wrong CT", c.label);
2072 }
2073
2074 // The body-less operation must NOT produce content-type-swap
2075 // probes (no body → no content type to lie about).
2076 let body_less_swaps = captures
2077 .iter()
2078 .filter(|c| {
2079 c.label.starts_with("request-body:content-type-mismatch:")
2080 && c.url.ends_with("/ping")
2081 })
2082 .count();
2083 assert_eq!(
2084 body_less_swaps, 0,
2085 "GET /ping has no request body; should not produce content-type-swap probes"
2086 );
2087 }
2088
2089 /// Round 27 (k variant b) — Srikanth's round-23 follow-up on (k):
2090 /// JSON envelope with embedded non-JSON field values. For each
2091 /// JSON-body operation, four extra probes fire that send valid
2092 /// JSON with an XML/YAML/multipart/urlencoded snippet stuffed
2093 /// into a string field. Content-Type stays `application/json`;
2094 /// expected is 2xx-3xx (the body parses); a 5xx flags a server
2095 /// that crashed on the embedded content.
2096 #[tokio::test]
2097 async fn embedded_content_probes_fire_with_honest_content_type() {
2098 let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2099 let cfg = SelfTestConfig {
2100 target_url: "http://127.0.0.1:1".into(),
2101 timeout: Duration::from_millis(50),
2102 capture: Some(sink.clone()),
2103 ..Default::default()
2104 };
2105 let ops = vec![op(
2106 "POST",
2107 "/users",
2108 Some("{\"name\":\"alice\",\"age\":30}"),
2109 vec![],
2110 vec![],
2111 vec![],
2112 )];
2113 let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2114 let captures = sink.lock().unwrap();
2115 let embedded: Vec<&CaseCapture> = captures
2116 .iter()
2117 .filter(|c| c.label.starts_with("request-body:embedded-content:"))
2118 .collect();
2119 assert_eq!(
2120 embedded.len(),
2121 4,
2122 "expected 4 embedded-content probes, got: {:?}",
2123 embedded.iter().map(|c| &c.label).collect::<Vec<_>>()
2124 );
2125 // Every embedded probe must carry the honest application/json
2126 // Content-Type (NOT lie like the variant-a content-type-swap
2127 // probes do) and a request body that still parses as JSON.
2128 for c in &embedded {
2129 let ct = c
2130 .request_headers
2131 .iter()
2132 .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
2133 .map(|(_, v)| v.as_str())
2134 .unwrap_or("");
2135 assert!(
2136 ct.contains("application/json"),
2137 "embedded probe `{}` should keep Content-Type honest, got {ct}",
2138 c.label
2139 );
2140 let body = c.request_body.as_deref().unwrap_or("");
2141 assert!(
2142 serde_json::from_str::<serde_json::Value>(body).is_ok(),
2143 "embedded probe `{}` body should still be valid JSON, got: {body}",
2144 c.label
2145 );
2146 }
2147 }
2148
2149 /// `embed_payload_in_first_string_field` walks objects depth-first
2150 /// and replaces only the FIRST string-valued leaf, leaving the
2151 /// surrounding structure intact.
2152 #[test]
2153 fn embed_payload_replaces_first_string_only() {
2154 let sample = r#"{"name":"alice","age":30,"tags":["admin","user"]}"#;
2155 let mutated = embed_payload_in_first_string_field(sample, "<x/>");
2156 let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2157 assert_eq!(v["name"], serde_json::json!("<x/>"));
2158 // age stays an integer (not stringified by the mutation).
2159 assert_eq!(v["age"], serde_json::json!(30));
2160 // tags array's strings stay untouched (we only replace the
2161 // first encountered string leaf, depth-first).
2162 assert_eq!(v["tags"][0], serde_json::json!("admin"));
2163 assert_eq!(v["tags"][1], serde_json::json!("user"));
2164 }
2165
2166 /// When the sample has NO string field, the helper falls back to
2167 /// `{"data": "<snippet>"}` so the probe still has something to
2168 /// POST. The fallback must produce valid JSON regardless of what
2169 /// characters the snippet contains.
2170 #[test]
2171 fn embed_payload_falls_back_when_no_string_field() {
2172 let no_strings = r#"{"a":1,"b":[2,3]}"#;
2173 let mutated = embed_payload_in_first_string_field(no_strings, "<x><y></y></x>");
2174 let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2175 assert_eq!(v["data"], serde_json::json!("<x><y></y></x>"));
2176 }
2177
2178 #[test]
2179 fn embed_payload_handles_invalid_json_sample() {
2180 let not_json = "garbage";
2181 let mutated = embed_payload_in_first_string_field(not_json, "a=1&b=2");
2182 let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2183 assert_eq!(v["data"], serde_json::json!("a=1&b=2"));
2184 }
2185
2186 /// Round 26 — Srikanth saw `at /: Type { kind: Single` in his
2187 /// 0.3.169 capture for the vCenter `infraprofile/configs` 202
2188 /// response (spec promised `type: string`, server returned a
2189 /// JSON object). The output was a broken-syntax debug string.
2190 /// This test reproduces his exact spec+body and asserts the
2191 /// message is readable.
2192 #[test]
2193 fn response_schema_error_message_is_readable() {
2194 let schema = serde_json::json!({"type": "string"});
2195 let body = r#"{"data":{},"id":"generated_id","status":"created"}"#;
2196 let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2197 // The message must NOT contain Rust debug syntax leftovers
2198 // ("Type { kind:", trailing "{" or "(" tokens). It SHOULD say
2199 // what type was expected.
2200 assert!(!err.contains("Type { kind"), "stale debug output: {err}");
2201 assert!(!err.contains("{ kind:"), "stale debug output: {err}");
2202 assert!(err.contains("string"), "should name expected type: {err}");
2203 // Round 29 — Srikanth on 0.3.172 was confused by `at /:`,
2204 // thinking it pointed to the URL path. The new format
2205 // explicitly says "response body root" for the root case
2206 // (and "response body at /<pointer>" for nested fields).
2207 assert!(
2208 err.contains("response body root"),
2209 "should label root explicitly so reader knows it's not the URL: {err}"
2210 );
2211 // Round 28 — Srikanth wanted the expected schema embedded
2212 // in the message so it reads as 'expected schema {"type":"string"}'.
2213 assert!(
2214 err.contains("expected schema") && err.contains("\"type\":\"string\""),
2215 "should include expected schema JSON: {err}"
2216 );
2217 }
2218
2219 /// Round 29 — for non-root paths the format reads
2220 /// "response body at /name: ...". Catches the case where the
2221 /// root rewording accidentally dropped the JSON-pointer for
2222 /// nested fields.
2223 #[test]
2224 fn response_schema_error_uses_response_body_prefix_for_nested_paths() {
2225 let schema = serde_json::json!({
2226 "type": "object",
2227 "required": ["name"],
2228 "properties": {"name": {"type": "string"}}
2229 });
2230 let body = r#"{"name": 123}"#;
2231 let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2232 assert!(
2233 err.contains("response body at /name"),
2234 "nested path should read 'response body at /name': {err}"
2235 );
2236 assert!(!err.contains("response body root"), "wrong label for nested: {err}");
2237 // Round 30 — the "expected schema" suffix should be the
2238 // sub-schema at /name, not the entire object schema. Reader
2239 // shouldn't have to scan a 300-char object to find the
2240 // constraint that failed.
2241 assert!(
2242 err.contains(r#"expected schema {"type":"string"}"#),
2243 "should show only the /name sub-schema, not the full object: {err}"
2244 );
2245 }
2246
2247 /// Round 30 — Srikanth asked how a deeper nested mismatch reads.
2248 /// Schema: `name.type` should be a string; body has it as a number.
2249 /// JSON pointer is `/name/type`.
2250 #[test]
2251 fn response_schema_error_uses_response_body_prefix_for_deep_nested_paths() {
2252 let schema = serde_json::json!({
2253 "type": "object",
2254 "properties": {
2255 "name": {
2256 "type": "object",
2257 "properties": {"type": {"type": "string"}}
2258 }
2259 }
2260 });
2261 let body = r#"{"name": {"type": 123}}"#;
2262 let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2263 assert!(
2264 err.contains("response body at /name/type"),
2265 "deep nested path should read 'response body at /name/type': {err}"
2266 );
2267 // Round 30 — for deep paths the sub-schema is the leaf
2268 // {"type":"string"}, not the wrapping object schemas.
2269 assert!(
2270 err.contains(r#"expected schema {"type":"string"}"#),
2271 "should show only the /name/type leaf sub-schema: {err}"
2272 );
2273 }
2274
2275 /// Round 30 — when the instance pointer can't be resolved through
2276 /// the schema's `properties` chain (e.g. additionalProperties hit),
2277 /// `sub_schema_at_pointer` returns None and the message falls back
2278 /// to the full schema. Verifies the fallback path is wired.
2279 #[test]
2280 fn sub_schema_at_pointer_falls_back_for_unresolvable_paths() {
2281 let schema = serde_json::json!({"type":"object","additionalProperties":true});
2282 // Walker can't resolve /unknown, so we get the full schema back.
2283 assert_eq!(
2284 sub_schema_at_pointer(&schema, "/unknown"),
2285 None,
2286 "unresolvable path should return None to trigger fallback"
2287 );
2288 // Root path returns the whole schema.
2289 assert_eq!(sub_schema_at_pointer(&schema, "/"), Some(schema.clone()));
2290 assert_eq!(sub_schema_at_pointer(&schema, ""), Some(schema));
2291 }
2292
2293 #[test]
2294 fn response_schema_error_required_field_is_readable() {
2295 let schema = serde_json::json!({
2296 "type": "object",
2297 "required": ["id"],
2298 "properties": {"id": {"type": "integer"}}
2299 });
2300 let body = r#"{"other": 1}"#;
2301 let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2302 assert!(err.contains("required field missing"), "{err}");
2303 assert!(err.contains("id"), "{err}");
2304 }
2305
2306 #[test]
2307 fn response_schema_error_none_on_match() {
2308 let schema = serde_json::json!({"type": "string"});
2309 assert_eq!(validate_body_against_schema("\"hello\"", &schema), None);
2310 }
2311
2312 #[test]
2313 fn json_serialises_report() {
2314 let r = SelfTestReport {
2315 positive_pass: 1,
2316 positive_fail: 0,
2317 negative_caught: BTreeMap::new(),
2318 negative_missed: BTreeMap::new(),
2319 operations: vec![OperationResult {
2320 method: "GET".into(),
2321 path: "/x".into(),
2322 positive: Some(CaseOutcome {
2323 label: "positive".into(),
2324 expected_4xx: false,
2325 actual_status: 200,
2326 passed: true,
2327 }),
2328 negatives: Vec::new(),
2329 }],
2330 };
2331 let json = serde_json::to_value(&r).expect("serialises");
2332 assert_eq!(json["positive_pass"], serde_json::json!(1));
2333 assert_eq!(json["operations"][0]["positive"]["actual_status"], serde_json::json!(200));
2334 }
2335}