Skip to main content

mockforge_bench/conformance/
request_validator.rs

1//! Request validation against OpenAPI spec.
2//!
3//! Validates that conformance test requests (especially from HAR custom checks)
4//! conform to the OpenAPI specification: correct paths, required parameters,
5//! valid request body schemas, and matching content types.
6
7use crate::error::Result;
8use crate::spec_parser::SpecParser;
9use openapiv3::{OpenAPI, ReferenceOr};
10use serde::Serialize;
11use std::collections::HashMap;
12use std::path::Path;
13
14use super::custom::CustomConformanceConfig;
15
16/// A single request validation violation
17#[derive(Debug, Serialize)]
18pub struct RequestViolation {
19    /// Check name from the custom YAML
20    pub check_name: String,
21    /// Request method
22    pub method: String,
23    /// Request path
24    pub path: String,
25    /// Type of violation
26    pub violation_type: String,
27    /// Human-readable description
28    pub message: String,
29}
30
31/// Validate custom conformance checks against an OpenAPI spec.
32///
33/// Returns a list of violations (empty if all checks are valid).
34pub fn validate_custom_checks(
35    spec: &OpenAPI,
36    custom_checks_file: &Path,
37    base_path: Option<&str>,
38) -> Result<Vec<RequestViolation>> {
39    let config = CustomConformanceConfig::from_file(custom_checks_file)?;
40    let mut violations = Vec::new();
41
42    // Build a map of spec paths -> operations for matching
43    let spec_ops = build_spec_operation_map(spec);
44
45    for check in &config.custom_checks {
46        // Strip query string from path for matching
47        let check_path = check.path.split('?').next().unwrap_or(&check.path);
48
49        // Try to match the check's path to a spec operation
50        let spec_path = match find_matching_spec_path(check_path, &spec_ops, base_path) {
51            Some(p) => p,
52            None => {
53                violations.push(RequestViolation {
54                    check_name: check.name.clone(),
55                    method: check.method.clone(),
56                    path: check.path.clone(),
57                    violation_type: "unknown_path".to_string(),
58                    message: format!(
59                        "Path '{}' not found in OpenAPI spec (checked with base_path={:?})",
60                        check_path, base_path
61                    ),
62                });
63                continue;
64            }
65        };
66
67        // Check if the method is defined for this path
68        let path_item = match spec.paths.paths.get(&spec_path) {
69            Some(ReferenceOr::Item(item)) => item,
70            _ => continue,
71        };
72
73        let method_lower = check.method.to_lowercase();
74        let operation = match method_lower.as_str() {
75            "get" => path_item.get.as_ref(),
76            "post" => path_item.post.as_ref(),
77            "put" => path_item.put.as_ref(),
78            "delete" => path_item.delete.as_ref(),
79            "patch" => path_item.patch.as_ref(),
80            "head" => path_item.head.as_ref(),
81            "options" => path_item.options.as_ref(),
82            _ => None,
83        };
84
85        let operation = match operation {
86            Some(op) => op,
87            None => {
88                violations.push(RequestViolation {
89                    check_name: check.name.clone(),
90                    method: check.method.clone(),
91                    path: check.path.clone(),
92                    violation_type: "method_not_allowed".to_string(),
93                    message: format!(
94                        "Method '{}' not defined for path '{}' in the spec",
95                        check.method, spec_path
96                    ),
97                });
98                continue;
99            }
100        };
101
102        // Validate request body for POST/PUT/PATCH
103        if matches!(method_lower.as_str(), "post" | "put" | "patch") {
104            validate_request_body(
105                &check.name,
106                &check.method,
107                &check.path,
108                check.body.as_deref(),
109                operation,
110                spec,
111                &mut violations,
112            );
113        }
114
115        // Check required parameters
116        validate_parameters(
117            &check.name,
118            &check.method,
119            &check.path,
120            check_path,
121            &check.headers,
122            operation,
123            path_item,
124            spec,
125            &mut violations,
126        );
127    }
128
129    Ok(violations)
130}
131
132/// Collected spec operations indexed by path
133type SpecOperationMap = HashMap<String, Vec<String>>; // path -> [methods]
134
135fn build_spec_operation_map(spec: &OpenAPI) -> SpecOperationMap {
136    let mut map = HashMap::new();
137    for (path, item_ref) in &spec.paths.paths {
138        if let ReferenceOr::Item(item) = item_ref {
139            let mut methods = Vec::new();
140            if item.get.is_some() {
141                methods.push("GET".to_string());
142            }
143            if item.post.is_some() {
144                methods.push("POST".to_string());
145            }
146            if item.put.is_some() {
147                methods.push("PUT".to_string());
148            }
149            if item.delete.is_some() {
150                methods.push("DELETE".to_string());
151            }
152            if item.patch.is_some() {
153                methods.push("PATCH".to_string());
154            }
155            if item.head.is_some() {
156                methods.push("HEAD".to_string());
157            }
158            if item.options.is_some() {
159                methods.push("OPTIONS".to_string());
160            }
161            map.insert(path.clone(), methods);
162        }
163    }
164    map
165}
166
167/// Try to match a concrete path (e.g., "/users/123") to a spec path template
168/// (e.g., "/users/{id}"). Handles base_path stripping.
169fn find_matching_spec_path(
170    check_path: &str,
171    spec_ops: &SpecOperationMap,
172    base_path: Option<&str>,
173) -> Option<String> {
174    // Try exact match first
175    if spec_ops.contains_key(check_path) {
176        return Some(check_path.to_string());
177    }
178
179    // Try with base_path prepended
180    if let Some(bp) = base_path {
181        let with_base = format!("{}{}", bp.trim_end_matches('/'), check_path);
182        if spec_ops.contains_key(&with_base) {
183            return Some(with_base);
184        }
185    }
186
187    // Try template matching (e.g., /users/123 matches /users/{id})
188    for spec_path in spec_ops.keys() {
189        if path_matches_template(check_path, spec_path)
190            || base_path
191                .map(|bp| {
192                    let with_base = format!("{}{}", bp.trim_end_matches('/'), check_path);
193                    path_matches_template(&with_base, spec_path)
194                })
195                .unwrap_or(false)
196        {
197            return Some(spec_path.clone());
198        }
199    }
200
201    None
202}
203
204/// Check if a concrete path matches a path template with {param} segments
205fn path_matches_template(concrete: &str, template: &str) -> bool {
206    let concrete_parts: Vec<&str> = concrete.split('/').collect();
207    let template_parts: Vec<&str> = template.split('/').collect();
208
209    if concrete_parts.len() != template_parts.len() {
210        return false;
211    }
212
213    concrete_parts
214        .iter()
215        .zip(template_parts.iter())
216        .all(|(c, t)| t.starts_with('{') && t.ends_with('}') || c == t)
217}
218
219/// Validate request body against the spec's requestBody schema
220#[allow(clippy::too_many_arguments)]
221fn validate_request_body(
222    check_name: &str,
223    method: &str,
224    path: &str,
225    body: Option<&str>,
226    operation: &openapiv3::Operation,
227    spec: &OpenAPI,
228    violations: &mut Vec<RequestViolation>,
229) {
230    let request_body_ref = match &operation.request_body {
231        Some(rb) => rb,
232        None => {
233            // Spec doesn't define a requestBody — body is optional
234            return;
235        }
236    };
237
238    // Resolve $ref if needed
239    let request_body = match request_body_ref {
240        ReferenceOr::Item(rb) => rb,
241        ReferenceOr::Reference { reference } => {
242            let name = reference.strip_prefix("#/components/requestBodies/").unwrap_or(reference);
243            match spec.components.as_ref().and_then(|c| c.request_bodies.get(name)) {
244                Some(ReferenceOr::Item(rb)) => rb,
245                _ => return,
246            }
247        }
248    };
249
250    // Check if body is required but missing
251    if request_body.required && body.is_none() {
252        violations.push(RequestViolation {
253            check_name: check_name.to_string(),
254            method: method.to_string(),
255            path: path.to_string(),
256            violation_type: "missing_required_body".to_string(),
257            message: "Spec requires a request body but none is provided in the check".to_string(),
258        });
259        return;
260    }
261
262    // If body is provided, validate against schema
263    if let Some(body_str) = body {
264        // Find JSON content type
265        let json_media = request_body.content.get("application/json").or_else(|| {
266            request_body.content.iter().find(|(k, _)| k.contains("json")).map(|(_, v)| v)
267        });
268
269        if let Some(media) = json_media {
270            if let Some(schema_ref) = &media.schema {
271                // Resolve the immediate $ref (one level) to get the
272                // root schema, then hand both schema + spec to the
273                // ref-resolver helper so nested `$ref` strings (e.g.
274                // `#/components/schemas/Vcenter.VM.DiskCloneSpec`)
275                // resolve against the full document context.
276                //
277                // Round 18.3 — pre-fix this called
278                // `jsonschema::validator_for(&schema_json)` directly,
279                // which used the inner schema as the validator's
280                // document. Nested $refs to `#/components/schemas/X`
281                // then failed with "Pointer '...' does not exist"
282                // because the validator's document had no
283                // `components` key (Srikanth's vCenter run: 157
284                // violations).
285                let root_schema = match schema_ref {
286                    ReferenceOr::Item(s) => s.clone(),
287                    ReferenceOr::Reference { reference } => {
288                        let name =
289                            reference.strip_prefix("#/components/schemas/").unwrap_or(reference);
290                        match spec.components.as_ref().and_then(|c| c.schemas.get(name)) {
291                            Some(ReferenceOr::Item(s)) => s.clone(),
292                            _ => return,
293                        }
294                    }
295                };
296
297                // Parse body as JSON and validate against schema
298                match serde_json::from_str::<serde_json::Value>(body_str) {
299                    Ok(body_value) => {
300                        match mockforge_openapi::schema_ref_resolver::build_validator(
301                            &root_schema,
302                            spec,
303                        ) {
304                            Ok(validator) => {
305                                let errors: Vec<_> = validator.iter_errors(&body_value).collect();
306                                for err in errors.iter().take(5) {
307                                    violations.push(RequestViolation {
308                                        check_name: check_name.to_string(),
309                                        method: method.to_string(),
310                                        path: path.to_string(),
311                                        violation_type: "body_schema_violation".to_string(),
312                                        message: format!(
313                                            "Request body schema violation at {}: {}",
314                                            err.instance_path, err
315                                        ),
316                                    });
317                                }
318                            }
319                            Err(_) => {
320                                // Schema itself is invalid — skip validation
321                            }
322                        }
323                    }
324                    Err(e) => {
325                        violations.push(RequestViolation {
326                            check_name: check_name.to_string(),
327                            method: method.to_string(),
328                            path: path.to_string(),
329                            violation_type: "body_not_json".to_string(),
330                            message: format!("Request body is not valid JSON: {}", e),
331                        });
332                    }
333                }
334            }
335        }
336    }
337}
338
339/// Validate required parameters from the spec
340#[allow(clippy::too_many_arguments)]
341fn validate_parameters(
342    check_name: &str,
343    method: &str,
344    path: &str,
345    check_path_no_query: &str,
346    check_headers: &HashMap<String, String>,
347    operation: &openapiv3::Operation,
348    path_item: &openapiv3::PathItem,
349    spec: &OpenAPI,
350    violations: &mut Vec<RequestViolation>,
351) {
352    // Collect all parameters (path-level + operation-level)
353    let mut all_params = Vec::new();
354    for p in &path_item.parameters {
355        if let Some(param) = resolve_parameter(p, spec) {
356            all_params.push(param);
357        }
358    }
359    for p in &operation.parameters {
360        if let Some(param) = resolve_parameter(p, spec) {
361            all_params.push(param);
362        }
363    }
364
365    for param in &all_params {
366        let param_data = match param {
367            openapiv3::Parameter::Query { parameter_data, .. } => {
368                if !parameter_data.required {
369                    continue;
370                }
371                // Check if query param is in the path's query string
372                let has_param = check_path_no_query != path
373                    && path.contains(&format!("{}=", parameter_data.name));
374                if !has_param {
375                    violations.push(RequestViolation {
376                        check_name: check_name.to_string(),
377                        method: method.to_string(),
378                        path: path.to_string(),
379                        violation_type: "missing_required_query_param".to_string(),
380                        message: format!(
381                            "Required query parameter '{}' is missing",
382                            parameter_data.name
383                        ),
384                    });
385                }
386                continue;
387            }
388            openapiv3::Parameter::Header { parameter_data, .. } => parameter_data,
389            openapiv3::Parameter::Path { parameter_data, .. } => {
390                // Path params are always required — but they're embedded in the URL
391                // so we can't easily validate them here (they're already resolved)
392                let _ = parameter_data;
393                continue;
394            }
395            openapiv3::Parameter::Cookie { .. } => continue,
396        };
397
398        if param_data.required {
399            let has_header = check_headers.keys().any(|k| k.eq_ignore_ascii_case(&param_data.name));
400            if !has_header {
401                violations.push(RequestViolation {
402                    check_name: check_name.to_string(),
403                    method: method.to_string(),
404                    path: path.to_string(),
405                    violation_type: "missing_required_header".to_string(),
406                    message: format!("Required header parameter '{}' is missing", param_data.name),
407                });
408            }
409        }
410    }
411}
412
413/// Resolve a parameter reference
414fn resolve_parameter<'a>(
415    param_ref: &'a ReferenceOr<openapiv3::Parameter>,
416    spec: &'a OpenAPI,
417) -> Option<&'a openapiv3::Parameter> {
418    match param_ref {
419        ReferenceOr::Item(p) => Some(p),
420        ReferenceOr::Reference { reference } => {
421            let name = reference.strip_prefix("#/components/parameters/")?;
422            match spec.components.as_ref()?.parameters.get(name)? {
423                ReferenceOr::Item(p) => Some(p),
424                _ => None,
425            }
426        }
427    }
428}
429
430/// Resolve a schema reference to a serde_json::Value for validation.
431/// Reserved for round 21.3 (response-body shape validation against the
432/// spec's response schema). Not yet wired into a call site.
433#[allow(dead_code)]
434fn resolve_schema_to_json(
435    schema_ref: &ReferenceOr<openapiv3::Schema>,
436    spec: &OpenAPI,
437) -> Option<serde_json::Value> {
438    let schema = match schema_ref {
439        ReferenceOr::Item(s) => s,
440        ReferenceOr::Reference { reference } => {
441            let name = reference.strip_prefix("#/components/schemas/")?;
442            match spec.components.as_ref()?.schemas.get(name)? {
443                ReferenceOr::Item(s) => s,
444                _ => return None,
445            }
446        }
447    };
448    serde_json::to_value(schema).ok()
449}
450
451/// Run request validation and write results to a file.
452/// Called from the conformance execution path.
453pub async fn run_request_validation(
454    spec_files: &[std::path::PathBuf],
455    custom_checks_file: Option<&Path>,
456    base_path: Option<&str>,
457    output_dir: &Path,
458) -> Result<usize> {
459    let custom_file = match custom_checks_file {
460        Some(f) => f,
461        None => return Ok(0),
462    };
463
464    if spec_files.is_empty() {
465        return Ok(0);
466    }
467
468    let parser = SpecParser::from_file(&spec_files[0]).await?;
469    let spec = parser.spec();
470
471    let violations = validate_custom_checks(spec, custom_file, base_path)?;
472
473    if !violations.is_empty() {
474        let path = output_dir.join("conformance-request-violations.json");
475        if let Ok(json) = serde_json::to_string_pretty(&violations) {
476            let _ = std::fs::write(&path, json);
477            tracing::info!(
478                "Found {} request validation violation(s), saved to {}",
479                violations.len(),
480                path.display()
481            );
482        }
483    }
484
485    Ok(violations.len())
486}
487
488/// Round 44 (#79) — validate each emitted request retrospectively
489/// against the OpenAPI spec, after the bench run completes. Reads
490/// `conformance-requests.json` (which `--export-requests` writes) and
491/// emits one [`RequestViolation`] entry per actual wire-level
492/// rule break (enum, type, required field, etc.), so a user can see
493/// the client's own view of what it sent that violated the contract
494/// without having to query the server's `/__mockforge/api/conformance/violations`.
495///
496/// Srikanth on 0.3.188: "Any reason why validate-requests in mockforge
497/// client are not catching all this query param or body params or path
498/// params violation issues and record in conformance-request-failure
499/// logs?" The existing `validate_custom_checks` only looks at the YAML
500/// shape at config time (missing required params, unknown path);
501/// auto-generated self-test probes ARE intentionally invalid but were
502/// never recorded client-side because they don't come from the YAML.
503/// This function complements the YAML-shape pass by checking each
504/// emitted request against the spec's actual rule set.
505///
506/// Appends to (not overwrites) `conformance-request-violations.json`
507/// when YAML-shape violations were already written above, so a single
508/// file holds both views.
509pub async fn validate_emitted_requests(
510    spec_files: &[std::path::PathBuf],
511    output_dir: &Path,
512) -> Result<usize> {
513    validate_emitted_requests_with_base_path(spec_files, output_dir, None).await
514}
515
516/// Round 45 (#79) — same as `validate_emitted_requests` but accepts an
517/// explicit `base_path` (e.g. Srikanth's `--base-path /api` for the
518/// Apigee spec where every operation lives under `/api/v1/...` on the
519/// wire but `/v1/...` in the spec). Without it the emitted URL doesn't
520/// match the spec path and every request silently skips validation.
521///
522/// Also broadened in r45 to:
523/// - extract path params from the URL and validate their values
524///   against the spec's path-parameter schemas (enum / type)
525/// - parse the request body when content-type is JSON and walk it
526///   against the requestBody schema's `required: [...]` and enum
527///   constraints on top-level properties
528///
529/// Body and path-param coverage is INTENTIONALLY shallow (top-level
530/// `required` + `enum`/`type` on direct properties only) — the
531/// authoritative validator is the OpenAPI server's; this is the
532/// client-side cross-check that mirrors the server's view on the
533/// wire-level requests the bench actually sent.
534pub async fn validate_emitted_requests_with_base_path(
535    spec_files: &[std::path::PathBuf],
536    output_dir: &Path,
537    base_path: Option<&str>,
538) -> Result<usize> {
539    use serde_json::Value;
540
541    if spec_files.is_empty() {
542        return Ok(0);
543    }
544    let requests_path = output_dir.join("conformance-requests.json");
545    let self_test_jsonl_path = output_dir.join("conformance-self-test-requests.jsonl");
546
547    // Round 49 (#79) — Srikanth on 0.3.193: self-test + --targets-file
548    // produced no violation logs because validate_emitted_requests
549    // only reads `conformance-requests.json` (the bench export
550    // shape), and self-test writes `conformance-self-test-
551    // requests.jsonl` (the CaseCapture shape). Now read whichever
552    // exists, converting the JSONL shape into the same `{check,
553    // method, url, request.body}` structure the validator below
554    // expects. If both exist, the bench export wins (a deliberate
555    // bench run shouldn't be overridden by stale self-test output).
556    let entries: Vec<Value> = if requests_path.exists() {
557        let bytes = match std::fs::read(&requests_path) {
558            Ok(b) => b,
559            Err(_) => return Ok(0),
560        };
561        match serde_json::from_slice(&bytes) {
562            Ok(v) => v,
563            Err(_) => return Ok(0),
564        }
565    } else if self_test_jsonl_path.exists() {
566        let bytes = match std::fs::read(&self_test_jsonl_path) {
567            Ok(b) => b,
568            Err(_) => return Ok(0),
569        };
570        let text = String::from_utf8_lossy(&bytes);
571        text.lines()
572            .filter(|l| !l.is_empty())
573            .filter_map(|l| serde_json::from_str::<Value>(l).ok())
574            .map(|case| {
575                let label = case.get("label").and_then(|v| v.as_str()).unwrap_or("").to_string();
576                let method = case.get("method").and_then(|v| v.as_str()).unwrap_or("").to_string();
577                let url = case.get("url").and_then(|v| v.as_str()).unwrap_or("").to_string();
578                let body = case.get("request_body").cloned().unwrap_or(Value::Null);
579                let mut req = serde_json::Map::new();
580                req.insert("method".into(), Value::String(method));
581                req.insert("url".into(), Value::String(url));
582                req.insert(
583                    "body".into(),
584                    match body {
585                        Value::String(s) => Value::String(s),
586                        Value::Null => Value::String(String::new()),
587                        other => other,
588                    },
589                );
590                let mut out = serde_json::Map::new();
591                out.insert("check".into(), Value::String(label));
592                out.insert("request".into(), Value::Object(req));
593                Value::Object(out)
594            })
595            .collect()
596    } else {
597        return Ok(0);
598    };
599    if entries.is_empty() {
600        return Ok(0);
601    }
602
603    let parser = SpecParser::from_file(&spec_files[0]).await?;
604    let spec = parser.spec();
605    let spec_ops = build_spec_operation_map(spec);
606
607    let mut emitted_violations: Vec<RequestViolation> = Vec::new();
608
609    for entry in &entries {
610        let check = entry.get("check").and_then(|v| v.as_str()).unwrap_or("").to_string();
611        let req = match entry.get("request") {
612            Some(r) => r,
613            None => continue,
614        };
615        let method = req.get("method").and_then(|v| v.as_str()).unwrap_or("").to_uppercase();
616        let url = req.get("url").and_then(|v| v.as_str()).unwrap_or("").to_string();
617        if method.is_empty() || url.is_empty() {
618            continue;
619        }
620        let (path_only, query_string) = match url.find('?') {
621            Some(i) => (url[..i].to_string(), url[i + 1..].to_string()),
622            None => (url.clone(), String::new()),
623        };
624        // Trim scheme + host from path so we match spec paths cleanly.
625        // "http://host:port/api/x" → "/api/x".
626        let path_only = if let Some(stripped) = path_only.split_once("://") {
627            match stripped.1.find('/') {
628                Some(i) => stripped.1[i..].to_string(),
629                None => "/".to_string(),
630            }
631        } else {
632            path_only
633        };
634
635        // Round 45 — strip base_path BEFORE matching so an Apigee-style
636        // `/api/v1/organizations` on the wire matches `/v1/organizations`
637        // in the spec when `--base-path /api` was passed.
638        let lookup_path = if let Some(bp) = base_path {
639            let bp = bp.trim_end_matches('/');
640            if !bp.is_empty() && path_only.starts_with(bp) {
641                let stripped = &path_only[bp.len()..];
642                if stripped.is_empty() {
643                    "/".to_string()
644                } else {
645                    stripped.to_string()
646                }
647            } else {
648                path_only.clone()
649            }
650        } else {
651            path_only.clone()
652        };
653
654        let spec_path = match find_matching_spec_path(&lookup_path, &spec_ops, None) {
655            Some(p) => p,
656            None => continue,
657        };
658        let path_item = match spec.paths.paths.get(&spec_path) {
659            Some(ReferenceOr::Item(item)) => item,
660            _ => continue,
661        };
662        let operation = match method.as_str() {
663            "GET" => path_item.get.as_ref(),
664            "POST" => path_item.post.as_ref(),
665            "PUT" => path_item.put.as_ref(),
666            "DELETE" => path_item.delete.as_ref(),
667            "PATCH" => path_item.patch.as_ref(),
668            "HEAD" => path_item.head.as_ref(),
669            "OPTIONS" => path_item.options.as_ref(),
670            _ => None,
671        };
672        let Some(operation) = operation else { continue };
673
674        // Inspect query parameters declared on this operation; for each
675        // sent query field, check it against the parameter's schema enum
676        // and type. This is what catches Srikanth's `?$.xgafv=test-value`
677        // case where the value isn't `"1"` or `"2"`.
678        let sent_query: HashMap<String, String> = query_string
679            .split('&')
680            .filter_map(|kv| {
681                let mut it = kv.splitn(2, '=');
682                let k = it.next()?.to_string();
683                let v = it.next().unwrap_or("").to_string();
684                if k.is_empty() {
685                    None
686                } else {
687                    Some((k, v))
688                }
689            })
690            .collect();
691
692        // Round 45 — bind path parameters by zipping the concrete URL
693        // path against the spec's template path. `/v1/{name}` ←
694        // `/v1/projects/abc` produces `{ "name": "projects/abc" }`.
695        // Used below to value-check each path-param against its
696        // declared schema (enum / type).
697        let path_params: HashMap<String, String> = {
698            let mut out = HashMap::new();
699            let concrete_parts: Vec<&str> = lookup_path.split('/').collect();
700            let template_parts: Vec<&str> = spec_path.split('/').collect();
701            if concrete_parts.len() == template_parts.len() {
702                for (c, t) in concrete_parts.iter().zip(template_parts.iter()) {
703                    if t.starts_with('{') && t.ends_with('}') {
704                        let name = &t[1..t.len() - 1];
705                        out.insert(name.to_string(), (*c).to_string());
706                    }
707                }
708            }
709            out
710        };
711
712        let mut all_params: Vec<&openapiv3::Parameter> = Vec::new();
713        for p in &path_item.parameters {
714            if let Some(param) = resolve_parameter(p, spec) {
715                all_params.push(param);
716            }
717        }
718        for p in &operation.parameters {
719            if let Some(param) = resolve_parameter(p, spec) {
720                all_params.push(param);
721            }
722        }
723
724        for param in &all_params {
725            let (loc_str, name, schema_ref) = match param {
726                openapiv3::Parameter::Query { parameter_data, .. } => {
727                    let openapiv3::ParameterSchemaOrContent::Schema(sref) = &parameter_data.format
728                    else {
729                        continue;
730                    };
731                    let Some(v) = sent_query.get(&parameter_data.name) else {
732                        continue;
733                    };
734                    ("query", &parameter_data.name, (sref, v.clone()))
735                }
736                openapiv3::Parameter::Path { parameter_data, .. } => {
737                    let openapiv3::ParameterSchemaOrContent::Schema(sref) = &parameter_data.format
738                    else {
739                        continue;
740                    };
741                    let Some(v) = path_params.get(&parameter_data.name) else {
742                        continue;
743                    };
744                    ("path", &parameter_data.name, (sref, v.clone()))
745                }
746                _ => continue,
747            };
748            let (schema_ref, value) = schema_ref;
749            let Some(schema) = schema_ref.as_item() else {
750                continue;
751            };
752            if let Some(msg) = check_value_against_schema(&value, schema) {
753                emitted_violations.push(RequestViolation {
754                    check_name: check.clone(),
755                    method: method.clone(),
756                    path: url.clone(),
757                    violation_type: format!("{}_value_mismatch", loc_str),
758                    message: format!("{}.{}: {}", loc_str, name, msg),
759                });
760            }
761        }
762
763        // Round 45 — request-body cross-check. Only kicks in when the
764        // sent body parses as JSON and the operation declares a JSON
765        // requestBody schema. Shallow: missing required top-level
766        // fields + enum/type mismatches on direct properties. Deeper
767        // schema walks (nested objects, oneOf/anyOf) are the server-
768        // side validator's job; we just want to surface the obvious
769        // wire-level breaks the bench actually fired.
770        let body_str = req.get("body").and_then(|v| v.as_str()).unwrap_or("");
771        if !body_str.is_empty() {
772            if let Ok(body_json) = serde_json::from_str::<serde_json::Value>(body_str) {
773                if let Some(req_body) = operation.request_body.as_ref().and_then(|r| r.as_item()) {
774                    for (ct, media) in &req_body.content {
775                        if !ct.contains("json") {
776                            continue;
777                        }
778                        let Some(schema_ref) = &media.schema else {
779                            continue;
780                        };
781                        let Some(schema) = schema_ref.as_item() else {
782                            continue;
783                        };
784                        check_body_against_schema(
785                            &check,
786                            &method,
787                            &url,
788                            &body_json,
789                            schema,
790                            &mut emitted_violations,
791                        );
792                    }
793                }
794            }
795        }
796    }
797
798    // Merge with any pre-existing custom-YAML violations on disk.
799    let dst = output_dir.join("conformance-request-violations.json");
800    let mut all: Vec<Value> = if dst.exists() {
801        match std::fs::read(&dst) {
802            Ok(b) => serde_json::from_slice(&b).unwrap_or_default(),
803            Err(_) => Vec::new(),
804        }
805    } else {
806        Vec::new()
807    };
808    for v in &emitted_violations {
809        if let Ok(val) = serde_json::to_value(v) {
810            all.push(val);
811        }
812    }
813    // Round 50 (#79) — dedup byte-identical violations. A multi-iteration
814    // self-test captures one probe per iteration, so a 22x duration run
815    // produced 22 copies of every violation in the flat file (and, before
816    // the grouping fixes below, 22 copies inside each grouped row). Keep
817    // the first occurrence of each (check_name, method, path,
818    // violation_type, message) tuple; re-runs that merged the on-disk file
819    // are collapsed too. Preserves first-seen order.
820    {
821        let mut seen: std::collections::HashSet<(String, String, String, String, String)> =
822            std::collections::HashSet::new();
823        all.retain(|v| {
824            let f = |k: &str| v.get(k).and_then(|x| x.as_str()).unwrap_or("").to_string();
825            seen.insert((
826                f("check_name"),
827                f("method"),
828                f("path"),
829                f("violation_type"),
830                f("message"),
831            ))
832        });
833    }
834    if !all.is_empty() {
835        if let Ok(json) = serde_json::to_string_pretty(&all) {
836            let _ = std::fs::write(&dst, json);
837            tracing::info!(
838                "validate-requests: wrote {} entries to {} ({} from emitted requests)",
839                all.len(),
840                dst.display(),
841                emitted_violations.len()
842            );
843        }
844    }
845
846    // Round 46 (#79) — Srikanth on 0.3.190: "I see three different
847    // messages, is this message for 3 different requests or for 1
848    // request. if it is 1 request can we have 1 line item mentioning
849    // violation 1 = message1, violation2 = message2 etc". Emit a
850    // sibling file grouped by (check_name, method, path) so each
851    // wire-level request shows up as a single row carrying every
852    // violation it raised. The per-violation file stays as-is for
853    // tooling that wants the flat shape.
854    let grouped_dst = output_dir.join("conformance-request-violations-by-request.json");
855    let grouped_value = group_violations_by_request(&all);
856    if let Ok(json) = serde_json::to_string_pretty(&grouped_value) {
857        let _ = std::fs::write(&grouped_dst, json);
858    }
859
860    // Round 48 (#79) — Srikanth on 0.3.192: "Can I assume all this
861    // checks has some violation either in the incoming request or
862    // outgoing response if yes then how can I see all this violation
863    // individually? Do we have any other Logs pointing each of those
864    // so that I can fix in one go?" New per-probe drill-down file
865    // emits one row per (check_name, method, path) carrying its full
866    // flat violation list. Lets the user see EXACTLY what each probe
867    // pattern (body:json, schema:string, constraint:enum, etc.)
868    // surfaced rather than just the deduped union the
869    // by-request file shows.
870    let drill_dst = output_dir.join("conformance-request-violations-by-probe.json");
871    let drill_value = group_violations_by_probe(&all);
872    if let Ok(json) = serde_json::to_string_pretty(&drill_value) {
873        let _ = std::fs::write(&drill_dst, json);
874    }
875    Ok(emitted_violations.len())
876}
877
878/// Round 48 (#79) — emit one entry per (check_name, method, path)
879/// with its full violation list. Unlike `group_violations_by_request`,
880/// this preserves the per-probe view so the user can see WHICH spec-
881/// probing pattern (body:json / schema:string / constraint:enum /
882/// method:POST / etc.) surfaced WHICH violation. Sorted by check_name
883/// within the same (method, path) so probes group together visually.
884fn group_violations_by_probe(flat: &[serde_json::Value]) -> serde_json::Value {
885    use serde_json::{Map, Value};
886
887    let mut by_probe_order: Vec<(String, String, String)> = Vec::new();
888    let mut by_probe: std::collections::HashMap<(String, String, String), Vec<(String, String)>> =
889        std::collections::HashMap::new();
890
891    // Round 50 (#79) — Srikanth on 0.3.194: "I see same violation is
892    // getting printed in logs for 22 times" on a multi-iteration run.
893    // The self-test capture holds one probe per iteration, so a 22x
894    // duration run feeds 22 byte-identical violations per probe into
895    // this flat list and we used to append all 22. Dedup identical
896    // (violation_type, message) pairs WITHIN a probe so each unique
897    // violation shows exactly once regardless of iteration count.
898    let mut seen_in_probe: std::collections::HashSet<(String, String, String, String)> =
899        std::collections::HashSet::new();
900    for v in flat {
901        let check = v.get("check_name").and_then(|x| x.as_str()).unwrap_or("").to_string();
902        let method = v.get("method").and_then(|x| x.as_str()).unwrap_or("").to_string();
903        let path = v.get("path").and_then(|x| x.as_str()).unwrap_or("").to_string();
904        let vt = v.get("violation_type").and_then(|x| x.as_str()).unwrap_or("").to_string();
905        let msg = v.get("message").and_then(|x| x.as_str()).unwrap_or("").to_string();
906        let key = (check.clone(), method.clone(), path.clone());
907        if !by_probe.contains_key(&key) {
908            by_probe_order.push(key.clone());
909        }
910        if seen_in_probe.insert((check, method, path, format!("{vt}\u{0}{msg}"))) {
911            by_probe.entry(key).or_default().push((vt, msg));
912        }
913    }
914
915    // Sort within same (method, path) by check_name for visual grouping.
916    by_probe_order.sort_by(|a, b| a.1.cmp(&b.1).then(a.2.cmp(&b.2)).then(a.0.cmp(&b.0)));
917
918    let mut rows: Vec<Value> = Vec::with_capacity(by_probe_order.len());
919    for key in &by_probe_order {
920        let (check, method, path) = key;
921        let entries = by_probe.get(key).cloned().unwrap_or_default();
922        let mut row = Map::new();
923        row.insert("check_name".into(), Value::String(check.clone()));
924        row.insert("method".into(), Value::String(method.clone()));
925        row.insert("path".into(), Value::String(path.clone()));
926        row.insert(
927            "violation_count".into(),
928            Value::Number(serde_json::Number::from(entries.len())),
929        );
930        for (i, (vt, msg)) in entries.iter().enumerate() {
931            let mut entry = Map::new();
932            entry.insert("violation_type".into(), Value::String(vt.clone()));
933            entry.insert("message".into(), Value::String(msg.clone()));
934            row.insert(format!("violation_{}", i + 1), Value::Object(entry));
935        }
936        rows.push(Value::Object(row));
937    }
938    Value::Array(rows)
939}
940
941/// Round 46–50 (#79) — collapse the flat list of
942/// [`RequestViolation`]-shaped JSON values into exactly ONE entry per
943/// `(method, path)`.
944///
945/// History: Round 46 keyed on `(check_name, method, path)` (too many
946/// duplicate rows). Round 47 collapsed by `(method, path)` AND the
947/// violation set, listing contributing checks in a `checks: [...]`
948/// array. But that re-split a single URL whenever two probe families
949/// produced DIFFERENT violation sets for it — Srikanth on 0.3.194:
950/// `owasp:ldap-injection` (query violations) landed in a different
951/// by-request row than the `request-body:*` checks for the very same
952/// URL, so his triage flow ("find the URL with the most violations
953/// here, then drill into by-probe") missed half the picture.
954///
955/// Round 50 makes this file the authoritative per-URL overview: one row
956/// per `(method, path)` carrying the DEDUPED UNION of every violation
957/// and every contributing `check_name`. The per-probe attribution
958/// ("which check surfaced which violation") lives in the sibling
959/// `conformance-request-violations-by-probe.json`. First-seen order is
960/// preserved for both checks and violations so the output is stable.
961fn group_violations_by_request(flat: &[serde_json::Value]) -> serde_json::Value {
962    use serde_json::{Map, Value};
963
964    let mut order: Vec<(String, String)> = Vec::new();
965    let mut checks_by_key: std::collections::HashMap<(String, String), Vec<String>> =
966        std::collections::HashMap::new();
967    let mut viols_by_key: std::collections::HashMap<(String, String), Vec<(String, String)>> =
968        std::collections::HashMap::new();
969    // Per-(method,path) dedup sets so a check fired across 22 iterations,
970    // or the same (vt,msg) surfaced by several checks, is counted once.
971    let mut seen_check: std::collections::HashSet<(String, String, String)> =
972        std::collections::HashSet::new();
973    let mut seen_viol: std::collections::HashSet<(String, String, String)> =
974        std::collections::HashSet::new();
975
976    for v in flat {
977        let check = v.get("check_name").and_then(|x| x.as_str()).unwrap_or("").to_string();
978        let method = v.get("method").and_then(|x| x.as_str()).unwrap_or("").to_string();
979        let path = v.get("path").and_then(|x| x.as_str()).unwrap_or("").to_string();
980        let vt = v.get("violation_type").and_then(|x| x.as_str()).unwrap_or("").to_string();
981        let msg = v.get("message").and_then(|x| x.as_str()).unwrap_or("").to_string();
982        let key = (method.clone(), path.clone());
983        if !checks_by_key.contains_key(&key) && !viols_by_key.contains_key(&key) {
984            order.push(key.clone());
985        }
986        if !check.is_empty() && seen_check.insert((method.clone(), path.clone(), check.clone())) {
987            checks_by_key.entry(key.clone()).or_default().push(check);
988        }
989        if seen_viol.insert((method.clone(), path.clone(), format!("{vt}\u{0}{msg}"))) {
990            viols_by_key.entry(key).or_default().push((vt, msg));
991        }
992    }
993
994    let mut rows: Vec<Value> = Vec::with_capacity(order.len());
995    for key in &order {
996        let (method, path) = key;
997        let checks = checks_by_key.get(key).cloned().unwrap_or_default();
998        let viols = viols_by_key.get(key).cloned().unwrap_or_default();
999        let mut row = Map::new();
1000        row.insert(
1001            "checks".into(),
1002            Value::Array(checks.iter().map(|s| Value::String(s.clone())).collect()),
1003        );
1004        // Round 48 (#79) — keep a single representative `check_name`
1005        // pointing at the check whose family matches the FIRST violation,
1006        // so the headline check isn't misleading. The full set is in
1007        // `checks[]`; per-violation attribution is in the by-probe file.
1008        let dominant_prefix: &str = viols
1009            .first()
1010            .map(|(vt, _)| {
1011                if vt.starts_with("query_") {
1012                    "param:query"
1013                } else if vt.starts_with("body_") {
1014                    "body:"
1015                } else if vt.starts_with("path_") {
1016                    "param:path"
1017                } else if vt.starts_with("header_") {
1018                    "param:header"
1019                } else {
1020                    ""
1021                }
1022            })
1023            .unwrap_or("");
1024        let best_check = if !dominant_prefix.is_empty() {
1025            checks
1026                .iter()
1027                .find(|c| c.starts_with(dominant_prefix))
1028                .cloned()
1029                .or_else(|| checks.first().cloned())
1030                .unwrap_or_default()
1031        } else {
1032            checks.first().cloned().unwrap_or_default()
1033        };
1034        row.insert("check_name".into(), Value::String(best_check));
1035        row.insert("method".into(), Value::String(method.clone()));
1036        row.insert("path".into(), Value::String(path.clone()));
1037        row.insert("violation_count".into(), Value::Number(serde_json::Number::from(viols.len())));
1038        for (i, (vt, msg)) in viols.iter().enumerate() {
1039            let mut entry = Map::new();
1040            entry.insert("violation_type".into(), Value::String(vt.clone()));
1041            entry.insert("message".into(), Value::String(msg.clone()));
1042            row.insert(format!("violation_{}", i + 1), Value::Object(entry));
1043        }
1044        rows.push(Value::Object(row));
1045    }
1046    Value::Array(rows)
1047}
1048
1049/// Round 45 (#79) — shallow body-vs-schema check for the retroactive
1050/// emitted-request validator. Pushes a [`RequestViolation`] for each
1051/// missing top-level `required` field and for each direct property
1052/// that fails an `enum` / type check. Intentionally does NOT recurse
1053/// into nested objects or follow `$ref` — the server-side validator is
1054/// authoritative there; this client-side pass only mirrors the obvious
1055/// wire-level breaks the bench actually fired.
1056fn check_body_against_schema(
1057    check: &str,
1058    method: &str,
1059    url: &str,
1060    body: &serde_json::Value,
1061    schema: &openapiv3::Schema,
1062    violations: &mut Vec<RequestViolation>,
1063) {
1064    use openapiv3::{SchemaKind, Type};
1065
1066    let SchemaKind::Type(Type::Object(obj_type)) = &schema.schema_kind else {
1067        return;
1068    };
1069    let Some(body_obj) = body.as_object() else {
1070        return;
1071    };
1072
1073    for required in &obj_type.required {
1074        if !body_obj.contains_key(required) {
1075            violations.push(RequestViolation {
1076                check_name: check.to_string(),
1077                method: method.to_string(),
1078                path: url.to_string(),
1079                violation_type: "body_missing_required".to_string(),
1080                message: format!("body.{}: required field missing", required),
1081            });
1082        }
1083    }
1084
1085    for (prop_name, prop_ref) in &obj_type.properties {
1086        let Some(value) = body_obj.get(prop_name) else {
1087            continue;
1088        };
1089        let Some(prop_schema) = prop_ref.as_item() else {
1090            continue;
1091        };
1092        if let Some(value_str) = value.as_str() {
1093            if let Some(msg) = check_value_against_schema(value_str, prop_schema) {
1094                violations.push(RequestViolation {
1095                    check_name: check.to_string(),
1096                    method: method.to_string(),
1097                    path: url.to_string(),
1098                    violation_type: "body_value_mismatch".to_string(),
1099                    message: format!("body.{}: {}", prop_name, msg),
1100                });
1101            }
1102        }
1103    }
1104}
1105
1106/// Round 44 (#79) — minimal value-vs-schema check for the retroactive
1107/// emitted-request validator. Returns a human-readable error message
1108/// when the value doesn't satisfy the schema, or `None` when it does.
1109/// Only handles the rules Srikanth's Apigee spec uses (enum, type:
1110/// integer, type: boolean); falls through silently for any other
1111/// rule rather than producing a false positive.
1112fn check_value_against_schema(value: &str, schema: &openapiv3::Schema) -> Option<String> {
1113    use openapiv3::{SchemaKind, Type};
1114
1115    let SchemaKind::Type(t) = &schema.schema_kind else {
1116        return None;
1117    };
1118    match t {
1119        Type::String(s) => {
1120            if !s.enumeration.is_empty() {
1121                let allowed: Vec<String> = s.enumeration.iter().filter_map(|e| e.clone()).collect();
1122                if !allowed.iter().any(|a| a == value) {
1123                    let quoted: Vec<String> =
1124                        allowed.iter().map(|a| format!("\"{}\"", a)).collect();
1125                    return Some(format!(
1126                        "value \"{}\" is not one of {}",
1127                        value,
1128                        quoted.join(" or ")
1129                    ));
1130                }
1131            }
1132            None
1133        }
1134        Type::Integer(_) => {
1135            if value.parse::<i64>().is_err() {
1136                Some(format!("value \"{}\" is not of type \"integer\"", value))
1137            } else {
1138                None
1139            }
1140        }
1141        Type::Number(_) => {
1142            if value.parse::<f64>().is_err() {
1143                Some(format!("value \"{}\" is not of type \"number\"", value))
1144            } else {
1145                None
1146            }
1147        }
1148        Type::Boolean(_) => match value {
1149            "true" | "false" => None,
1150            _ => Some(format!("value \"{}\" is not of type \"boolean\"", value)),
1151        },
1152        _ => None,
1153    }
1154}
1155
1156#[cfg(test)]
1157mod grouping_tests {
1158    use super::{group_violations_by_probe, group_violations_by_request};
1159    use serde_json::json;
1160
1161    /// Build a flat violation value the way `validate_emitted_requests` does.
1162    fn viol(check: &str, method: &str, path: &str, vt: &str, msg: &str) -> serde_json::Value {
1163        json!({
1164            "check_name": check,
1165            "method": method,
1166            "path": path,
1167            "violation_type": vt,
1168            "message": msg,
1169        })
1170    }
1171
1172    /// Round 50 (#79) — reproduces Srikanth's 0.3.194 report: a single URL
1173    /// whose query violations come from `owasp:ldap-injection` while its
1174    /// body violations come from `request-body:*` checks must collapse into
1175    /// ONE by-request row that lists BOTH check families and the UNION of
1176    /// every violation. Previously these split into two separate rows, so
1177    /// the owasp check was invisible from the body row he was reading.
1178    #[test]
1179    fn by_request_unions_all_checks_for_a_url() {
1180        let path = "https://host/v1/organizations?alt=test-value&prettyPrint=test-value";
1181        let flat = vec![
1182            viol(
1183                "request-body:type-mismatch:billingType",
1184                "POST",
1185                path,
1186                "body_type_mismatch",
1187                "body.billingType: expected string",
1188            ),
1189            viol(
1190                "owasp:ldap-injection",
1191                "POST",
1192                path,
1193                "query_value_mismatch",
1194                "query.alt: value \"test-value\" is not one of \"json\" or \"media\"",
1195            ),
1196            viol(
1197                "owasp:ldap-injection",
1198                "POST",
1199                path,
1200                "query_value_mismatch",
1201                "query.prettyPrint: value \"test-value\" is not of type \"boolean\"",
1202            ),
1203        ];
1204
1205        let out = group_violations_by_request(&flat);
1206        let rows = out.as_array().expect("array");
1207        // Exactly one row for the URL — no fragmentation.
1208        assert_eq!(rows.len(), 1, "expected a single by-request row per URL");
1209        let row = &rows[0];
1210        assert_eq!(row["violation_count"], 3);
1211        let checks: Vec<&str> =
1212            row["checks"].as_array().unwrap().iter().map(|c| c.as_str().unwrap()).collect();
1213        assert!(checks.contains(&"owasp:ldap-injection"), "owasp check must appear: {checks:?}");
1214        assert!(
1215            checks.iter().any(|c| c.starts_with("request-body:")),
1216            "body check must appear: {checks:?}"
1217        );
1218    }
1219
1220    /// Round 50 (#79) — "I see same violation is getting printed in logs for
1221    /// 22 times." A multi-iteration run feeds N identical violations per
1222    /// probe; the by-probe drill-down must show each unique violation once.
1223    #[test]
1224    fn by_probe_dedups_repeated_iterations() {
1225        let path = "https://host/v1/organizations?alt=test-value";
1226        let mut flat = Vec::new();
1227        for _ in 0..22 {
1228            flat.push(viol(
1229                "owasp:ldap-injection",
1230                "POST",
1231                path,
1232                "query_value_mismatch",
1233                "query.alt: value \"test-value\" is not one of \"json\" or \"media\"",
1234            ));
1235        }
1236
1237        let out = group_violations_by_probe(&flat);
1238        let rows = out.as_array().expect("array");
1239        assert_eq!(rows.len(), 1, "one probe row");
1240        assert_eq!(rows[0]["violation_count"], 1, "22 identical iterations collapse to 1");
1241        assert!(rows[0].get("violation_1").is_some());
1242        assert!(rows[0].get("violation_2").is_none(), "no duplicate violation_2");
1243    }
1244
1245    /// The by-request union must also collapse the 22x duplicates, not just
1246    /// dedup across checks.
1247    #[test]
1248    fn by_request_dedups_repeated_iterations() {
1249        let path = "https://host/v1/widgets";
1250        let mut flat = Vec::new();
1251        for _ in 0..22 {
1252            flat.push(viol(
1253                "request-body:type-mismatch:name",
1254                "POST",
1255                path,
1256                "body_type_mismatch",
1257                "body.name: expected string",
1258            ));
1259        }
1260        let out = group_violations_by_request(&flat);
1261        let rows = out.as_array().unwrap();
1262        assert_eq!(rows.len(), 1);
1263        assert_eq!(rows[0]["violation_count"], 1, "duplicate iterations collapse");
1264        let checks = rows[0]["checks"].as_array().unwrap();
1265        assert_eq!(checks.len(), 1, "the same check listed once");
1266    }
1267
1268    /// Distinct URLs stay distinct.
1269    #[test]
1270    fn by_request_keeps_distinct_urls_separate() {
1271        let flat = vec![
1272            viol("c1", "POST", "https://host/a", "body_type_mismatch", "a"),
1273            viol("c2", "GET", "https://host/b", "query_value_mismatch", "b"),
1274        ];
1275        let out = group_violations_by_request(&flat);
1276        assert_eq!(out.as_array().unwrap().len(), 2);
1277    }
1278}