parlov-elicit 0.5.0

//! `ScdProblemDetailsProducer` and `ScdProblemDetailsConsumer` — B5 structured-error chains.
//!
//! 4xx baseline with `application/problem+json` or `application/json` body →
//! harvest `errors[].field` names and `type` URI → generate a follow-up POST probe
//! with a minimum-valid body containing all required fields set to empty string.
//!
//! RFC 9457 makes error bodies explicitly machine-readable. `RESTler`, `EvoMaster`,
//! `Schemathesis`, and `WuppieFuzz` all treat error-response content as feedback.

use bytes::Bytes;
use http::{HeaderMap, Method};
use parlov_core::{
    always_applicable, NormativeStrength, OracleClass, ProbeDefinition, ResponseClass,
    SignalSurface, Technique, Vector,
};
use serde_json::Value;

use crate::chain::{Consumer, Producer, ProducerOutput, ProducerOutputKind};
use crate::context::ScanContext;
use crate::types::{ProbePair, ProbeSpec, RiskLevel, StrategyMetadata};
use crate::util::substitute_url;

static METADATA: StrategyMetadata = StrategyMetadata {
    strategy_id: "scd-problem-details-chain",
    strategy_name: "Problem-Details Chain (4xx → repaired POST body)",
    risk: RiskLevel::Safe,
};

static TECHNIQUE: Technique = Technique {
    id: "problem-details-chain",
    name: "Structured-error producer/consumer chain",
    oracle_class: OracleClass::Existence,
    vector: Vector::StatusCodeDiff,
    strength: NormativeStrength::Should,
    normalization_weight: None,
    inverted_signal_weight: None,
    method_relevant: false,
    parser_relevant: false,
    applicability: always_applicable,
    contradiction_surface: SignalSurface::Body,
};

/// Extracts field names and type URI from 4xx RFC 9457 / JSON error bodies.
///
/// Admits only `StructuredError` (4xx with JSON content type). Does not override
/// `extract` — body content is required, so only `extract_with_body` is meaningful.
pub(super) struct ScdProblemDetailsProducer;

impl Producer for ScdProblemDetailsProducer {
    fn admits(&self, class: ResponseClass) -> bool {
        matches!(class, ResponseClass::StructuredError)
    }

    fn extract(&self, _class: ResponseClass, _headers: &HeaderMap) -> Option<ProducerOutput> {
        None
    }

    fn extract_with_body(
        &self,
        class: ResponseClass,
        headers: &HeaderMap,
        body: &Bytes,
    ) -> Option<ProducerOutput> {
        if !self.admits(class) {
            return None;
        }
        if !has_json_content_type(headers) {
            return None;
        }
        if body.is_empty() {
            return None;
        }
        let json: Value = serde_json::from_slice(body).ok()?;
        let required_fields = extract_error_fields(&json);
        let error_type = json
            .get("type")
            .and_then(|v| v.as_str())
            .map(ToOwned::to_owned);
        if required_fields.is_empty() && error_type.is_none() {
            return None;
        }
        Some(ProducerOutput::ProblemDetails {
            required_fields,
            error_type,
        })
    }
}

/// Converts harvested field names into a single POST probe with a minimal repaired body.
///
/// When `required_fields` is non-empty, generates one `ProbeSpec::Pair` POST to
/// `ctx.target` (with `{id}` replaced by `ctx.probe_id`). The body is a JSON object
/// with each required field set to empty string. When `required_fields` is empty,
/// returns `vec![]`.
pub(super) struct ScdProblemDetailsConsumer;

impl Consumer for ScdProblemDetailsConsumer {
    fn needs(&self) -> ProducerOutputKind {
        ProducerOutputKind::ProblemDetails
    }

    fn generate(&self, ctx: &ScanContext, output: &ProducerOutput) -> Vec<ProbeSpec> {
        let ProducerOutput::ProblemDetails {
            required_fields, ..
        } = output
        else {
            return vec![];
        };
        if required_fields.is_empty() {
            return vec![];
        }
        let body = build_minimal_body(required_fields);
        let baseline_url = substitute_url(&ctx.target, &ctx.baseline_id);
        let probe_url = substitute_url(&ctx.target, &ctx.probe_id);
        let pair = ProbePair {
            baseline: ProbeDefinition {
                url: baseline_url,
                method: Method::POST,
                headers: ctx.headers.clone(),
                body: Some(body.clone()),
            },
            probe: ProbeDefinition {
                url: probe_url,
                method: Method::POST,
                headers: ctx.headers.clone(),
                body: Some(body),
            },
            canonical_baseline: None,
            metadata: METADATA.clone(),
            technique: TECHNIQUE,
            chain_provenance: None,
        };
        vec![ProbeSpec::Pair(pair)]
    }
}

/// Returns `true` if `Content-Type` contains `application/problem+json` or `application/json`.
fn has_json_content_type(headers: &HeaderMap) -> bool {
    headers
        .get(http::header::CONTENT_TYPE)
        .and_then(|v| v.to_str().ok())
        .is_some_and(|ct| {
            ct.contains("application/problem+json") || ct.contains("application/json")
        })
}

/// Extracts field names from `errors[].field` entries in the parsed JSON value.
fn extract_error_fields(json: &Value) -> Vec<String> {
    let Some(errors) = json.get("errors").and_then(|v| v.as_array()) else {
        return vec![];
    };
    errors
        .iter()
        .filter_map(|entry| entry.get("field")?.as_str().map(ToOwned::to_owned))
        .collect()
}

/// Builds a minimal JSON body with each field set to empty string.
fn build_minimal_body(fields: &[String]) -> Bytes {
    let obj: serde_json::Map<String, Value> = fields
        .iter()
        .map(|f| (f.clone(), Value::String(String::new())))
        .collect();
    let json = Value::Object(obj);
    Bytes::from(serde_json::to_vec(&json).unwrap_or_default())
}

#[cfg(test)]
#[path = "problem_details_tests.rs"]
mod tests;