parlov 0.8.0 - Docs.rs

//! Existence oracle pipeline: URL substitution, adaptive probe collection, analysis, output.
//!
//! Called from `main` when the `existence` subcommand is dispatched. Receives parsed
//! `ExistenceArgs`, drives an adaptive sampling loop collecting baseline/probe pairs until
//! the analyzer reaches a verdict, then prints output to stdout in the requested format.

use bytes::Bytes;
use http::{HeaderMap, Method};
use parlov_analysis::existence::ExistenceAnalyzer;
use parlov_analysis::{Analyzer, SampleDecision};
use parlov_core::{
    always_applicable, DifferentialSet, Error, NormativeStrength, OracleClass, OracleResult,
    ProbeDefinition, SignalSurface, StrategyOutcome, Technique, Vector,
};
use parlov_output::{render_json, render_sarif, render_table};
use parlov_probe::http::HttpProbe;
use parlov_probe::Probe;
use uuid::Uuid;

use crate::cli::{ExistenceArgs, OutputFormat};
use crate::util::parse_headers;

/// Default technique for the CLI existence subcommand (status-code diff).
const CLI_TECHNIQUE: Technique = Technique {
    id: "existence-cli",
    name: "CLI existence probe",
    oracle_class: OracleClass::Existence,
    vector: Vector::StatusCodeDiff,
    strength: NormativeStrength::Should,
    normalization_weight: Some(0.2),
    inverted_signal_weight: None,
    method_relevant: false,
    parser_relevant: false,
    applicability: always_applicable,
    contradiction_surface: SignalSurface::Status,
};

/// Runs the existence oracle pipeline for the given CLI arguments.
///
/// Drives an adaptive sampling loop: collects one baseline/probe pair per iteration,
/// calls `evaluate` after each, and stops when the analyzer returns `Complete`.
/// Same-status pairs short-circuit after 1 sample; differentials collect up to 3.
/// Output is printed to stdout in the format specified by `format`.
pub async fn run(args: ExistenceArgs, format: OutputFormat) -> Result<(), Error> {
    let probe_id = args.probe_id.unwrap_or_else(|| Uuid::new_v4().to_string());

    let method = parse_method(&args.method)?;
    let headers = parse_headers(&args.headers)?;
    let body_template = args.body.as_deref();

    let baseline_def = build_probe_def(
        &args.target,
        &args.baseline_id,
        &method,
        &headers,
        body_template,
    );
    let probe_def = build_probe_def(&args.target, &probe_id, &method, &headers, body_template);

    let result = collect_until_verdict(&baseline_def, &probe_def).await?;
    let output = format_result(format, &args.target, &args.method, &result)?;
    println!("{output}");
    Ok(())
}

const CLI_STRATEGY_ID: &str = "existence-cli";
const CLI_STRATEGY_NAME: &str = "CLI existence probe";

fn format_result(
    format: OutputFormat,
    target: &str,
    method: &str,
    result: &OracleResult,
) -> Result<String, Error> {
    match format {
        OutputFormat::Table => Ok(render_table(result)),
        OutputFormat::Json => {
            render_json(target, result, CLI_STRATEGY_ID, CLI_STRATEGY_NAME, method)
                .map_err(Error::Serialization)
        }
        OutputFormat::Sarif => {
            render_sarif(target, result, CLI_STRATEGY_ID, method).map_err(Error::Serialization)
        }
    }
}

fn build_probe_def(
    target: &str,
    id: &str,
    method: &Method,
    headers: &HeaderMap,
    body_template: Option<&str>,
) -> ProbeDefinition {
    ProbeDefinition {
        url: target.replace("{id}", id),
        method: method.clone(),
        headers: headers.clone(),
        body: substitute_body(body_template, id),
    }
}

/// Adaptive sampling loop: collects pairs until the analyzer reaches a verdict.
///
/// Discards the `DifferentialSet` and `StrategyOutcome` after analysis — callers that only
/// need the verdict use this. Callers that need the raw exchanges (e.g. the scan pipeline for
/// header harvesting) call `collect_with_technique` directly.
pub(crate) async fn collect_until_verdict(
    baseline_def: &ProbeDefinition,
    probe_def: &ProbeDefinition,
) -> Result<OracleResult, Error> {
    let probe = HttpProbe::new();
    let (result, _outcome, _diff) =
        collect_with_technique(baseline_def, probe_def, CLI_TECHNIQUE, &probe).await?;
    Ok(result)
}

/// Adaptive sampling loop with an explicit technique.
///
/// Returns the `OracleResult`, the `StrategyOutcome`, and the accumulated `DifferentialSet`.
/// The scan pipeline uses the `DifferentialSet` to extract baseline response headers without
/// issuing a second network request. CLI callers use `collect_until_verdict` instead.
pub(crate) async fn collect_with_technique(
    baseline_def: &ProbeDefinition,
    probe_def: &ProbeDefinition,
    technique: Technique,
    probe: &HttpProbe,
) -> Result<(OracleResult, StrategyOutcome, DifferentialSet), Error> {
    collect_with_technique_and_canonical(baseline_def, probe_def, None, technique, probe).await
}

/// Adaptive sampling loop with an explicit technique and optional canonical (unmutated) baseline.
///
/// When `canonical_def` is `Some`, dispatches a third concurrent request alongside the first
/// baseline/probe pair and wires the result into `DifferentialSet.canonical` before the analyzer
/// runs. Used by route-mutating strategies (`case_normalize`, `trailing_slash`) so
/// `control_integrity` can detect mutation-induced route destruction. When `None`, falls back to
/// the standard two-request-per-iteration dispatch.
pub(crate) async fn collect_with_technique_and_canonical(
    baseline_def: &ProbeDefinition,
    probe_def: &ProbeDefinition,
    canonical_def: Option<&ProbeDefinition>,
    technique: Technique,
    probe: &HttpProbe,
) -> Result<(OracleResult, StrategyOutcome, DifferentialSet), Error> {
    let analyzer = ExistenceAnalyzer;
    let mut diff_set = DifferentialSet {
        baseline: Vec::new(),
        probe: Vec::new(),
        canonical: None,
        technique,
    };

    if let Some(def) = canonical_def {
        let (canonical, b_exchange, p_exchange) = tokio::try_join!(
            probe.execute(def),
            probe.execute(baseline_def),
            probe.execute(probe_def),
        )?;
        diff_set.canonical = Some(canonical);
        diff_set.baseline.push(b_exchange);
        diff_set.probe.push(p_exchange);
        if let SampleDecision::Complete(result, outcome) = analyzer.evaluate(&diff_set) {
            return Ok((*result, outcome, diff_set));
        }
    }

    loop {
        let (b_exchange, p_exchange) =
            tokio::try_join!(probe.execute(baseline_def), probe.execute(probe_def),)?;
        diff_set.baseline.push(b_exchange);
        diff_set.probe.push(p_exchange);
        if let SampleDecision::Complete(result, outcome) = analyzer.evaluate(&diff_set) {
            return Ok((*result, outcome, diff_set));
        }
    }
}

fn parse_method(method_str: &str) -> Result<Method, Error> {
    method_str
        .parse::<Method>()
        .map_err(|e| Error::Http(format!("invalid HTTP method '{method_str}': {e}")))
}

/// Substitutes `{id}` in a body template, returning `None` when no template is provided.
fn substitute_body(template: Option<&str>, id: &str) -> Option<Bytes> {
    template.map(|t| Bytes::from(t.replace("{id}", id)))
}