parlov-analysis 0.7.0

//! `ExistenceAnalyzer` — delegates classification to the scoring pipeline in `classifier`.

use parlov_core::{
    DifferentialSet, OracleClass, OracleResult, OracleVerdict, ProbeExchange, StrategyOutcome,
    Vector,
};

use crate::aggregation::modifiers::compute_modifiers;
use crate::signals;
use crate::{Analyzer, SampleDecision};

use super::classifier::classify;

/// Tracks which code path produced a result, so `classify_outcome` can map it correctly.
#[derive(Clone, Copy)]
enum AnalysisPath {
    /// `is_relevant_differential` returned false — technique did not fire.
    NotFired,
    /// Responses were inconsistent across samples.
    Unstable,
    /// Baseline status equals probe status — same-status short-circuit.
    SameStatus,
    /// `build_result` ran — full signal extraction and scoring completed.
    FullyScored,
}

/// Analyzes a `DifferentialSet` for existence oracle signals via status-code differential.
///
/// Requires three samples when a differential is detected to confirm stability before
/// classifying. Short-circuits on same-status pairs (`NotPresent`) after the first sample.
/// Runs ALL signal extractors unconditionally and delegates scoring to `classifier::classify`.
pub struct ExistenceAnalyzer;

impl Analyzer for ExistenceAnalyzer {
    fn evaluate(&self, data: &DifferentialSet) -> SampleDecision {
        let Some(b0_exchange) = data.baseline.first() else {
            return SampleDecision::NeedMore;
        };
        let Some(p0_exchange) = data.probe.first() else {
            return SampleDecision::NeedMore;
        };
        let b0 = b0_exchange.response.status;
        let p0 = p0_exchange.response.status;

        if b0 == p0 {
            let result = build_result(data);
            let outcome =
                classify_outcome(&result, AnalysisPath::SameStatus, &data.technique, data);
            return SampleDecision::Complete(Box::new(result), outcome);
        }

        if data.baseline.len() < 3 {
            return SampleDecision::NeedMore;
        }

        let stable = is_consistent(&data.baseline) && is_consistent(&data.probe);
        if stable {
            if is_relevant_differential(data) {
                let result = build_result(data);
                let outcome =
                    classify_outcome(&result, AnalysisPath::FullyScored, &data.technique, data);
                SampleDecision::Complete(Box::new(result), outcome)
            } else {
                let result = not_fired_result(data);
                let outcome =
                    classify_outcome(&result, AnalysisPath::NotFired, &data.technique, data);
                SampleDecision::Complete(Box::new(result), outcome)
            }
        } else {
            let result = unstable_result(data);
            let outcome = classify_outcome(&result, AnalysisPath::Unstable, &data.technique, data);
            SampleDecision::Complete(Box::new(result), outcome)
        }
    }

    fn oracle_class(&self) -> OracleClass {
        OracleClass::Existence
    }
}

/// Maps an `AnalysisPath` and the resulting `OracleResult` to a `StrategyOutcome`.
fn classify_outcome(
    result: &OracleResult,
    path: AnalysisPath,
    technique: &parlov_core::Technique,
    differential: &DifferentialSet,
) -> StrategyOutcome {
    match path {
        AnalysisPath::NotFired | AnalysisPath::Unstable => {
            StrategyOutcome::NoSignal(result.clone())
        }
        AnalysisPath::SameStatus => same_status_outcome(result, technique, differential),
        AnalysisPath::FullyScored => match result.verdict {
            OracleVerdict::Confirmed | OracleVerdict::Likely => {
                StrategyOutcome::Positive(result.clone())
            }
            OracleVerdict::NotPresent | OracleVerdict::Inconclusive => {
                StrategyOutcome::NoSignal(result.clone())
            }
        },
    }
}

/// Resolves the `SameStatus` arm into a `StrategyOutcome`.
///
/// Applies the runtime [`EvidenceModifiers`](crate::aggregation::modifiers::EvidenceModifiers)
/// to the technique's `normalization_weight` before emitting `Contradictory`. With all
/// modifiers at 1.0 the effective weight equals the base weight exactly. Returns
/// `Inapplicable(reason)` when a precondition gate has fired (auth-gate, method-gate,
/// parser-failure, or applicability-marker-missing) and `NoSignal` when the technique
/// declares no `normalization_weight`.
fn same_status_outcome(
    result: &OracleResult,
    technique: &parlov_core::Technique,
    differential: &DifferentialSet,
) -> StrategyOutcome {
    let Some(base_weight) = technique.normalization_weight else {
        return StrategyOutcome::NoSignal(result.clone());
    };
    debug_assert!(
        base_weight > 0.0,
        "normalization_weight must be positive; got {base_weight} for {}",
        technique.id
    );
    let mr = compute_modifiers(technique, differential);
    if mr.is_blocked() {
        let reason = mr.block_reason.map_or("modifier blocked", |r| r.as_str());
        return StrategyOutcome::Inapplicable(std::borrow::Cow::Borrowed(reason));
    }
    #[allow(clippy::cast_possible_truncation)]
    let effective_weight = base_weight * mr.modifiers.total() as f32;
    debug_assert!(
        effective_weight > 0.0,
        "effective weight must be positive after modifiers"
    );
    StrategyOutcome::Contradictory(result.clone(), effective_weight)
}

fn build_result(data: &DifferentialSet) -> OracleResult {
    let b0 = data.baseline[0].response.status;
    let p0 = data.probe[0].response.status;

    let signals = extract_all_signals(data);
    classify(b0, p0, signals, &data.technique)
}

fn extract_all_signals(data: &DifferentialSet) -> Vec<parlov_core::Signal> {
    let mut out = Vec::new();
    signals::status_code::extract_into(data, &mut out);
    signals::header::extract_into(data, &mut out);
    signals::metadata::extract_into(data, &mut out);
    signals::body::extract_into(data, &mut out);
    out
}

fn is_consistent(exchanges: &[ProbeExchange]) -> bool {
    exchanges
        .iter()
        .all(|e| e.response.status == exchanges[0].response.status)
}

fn unstable_result(data: &DifferentialSet) -> OracleResult {
    let baseline_stable = is_consistent(&data.baseline);
    let probe_stable = is_consistent(&data.probe);

    let which = match (baseline_stable, probe_stable) {
        (false, false) => "baseline and probe sides",
        (false, true) => "baseline side",
        (true, false) => "probe side",
        // INVARIANT: callers check `!is_consistent` before calling this function,
        // so at least one side is unstable — (true, true) cannot occur.
        (true, true) => unreachable!("unstable_result called when both sides are stable"),
    };

    OracleResult {
        class: OracleClass::Existence,
        verdict: OracleVerdict::NotPresent,
        severity: None,
        confidence: 0,
        impact_class: None,
        reasons: vec![],
        signals: vec![parlov_core::Signal {
            kind: parlov_core::SignalKind::StatusCodeDiff,
            evidence: format!("unstable: {which}"),
            rfc_basis: None,
        }],
        technique_id: Some(data.technique.id.to_string()),
        vector: Some(data.technique.vector),
        normative_strength: Some(data.technique.strength),
        label: None,
        leaks: None,
        rfc_basis: None,
    }
}

/// Returns `true` when the differential is relevant to the technique that generated it.
///
/// Some vectors — particularly `RedirectDiff` — manipulate URLs in ways that can trigger
/// unexpected server-side behavior unrelated to redirect logic (e.g., `200 vs 412`). When
/// neither side of the differential is a 3xx, the technique did not fire and the result
/// must be dismissed rather than scored against the general pattern table.
fn is_relevant_differential(data: &DifferentialSet) -> bool {
    let b0 = data.baseline[0].response.status;
    let p0 = data.probe[0].response.status;

    match data.technique.vector {
        Vector::RedirectDiff => b0.is_redirection() || p0.is_redirection(),
        // Other vectors use the b0 == p0 short-circuit for "technique didn't fire."
        Vector::StatusCodeDiff | Vector::CacheProbing | Vector::ErrorMessageGranularity => true,
    }
}

/// Produces a `NotPresent` result annotating that the technique did not fire.
///
/// Used when `is_relevant_differential` returns `false` — the differential exists but is
/// a side effect of probe construction, not the expected signal. The signal evidence records
/// the dismissal reason so reporting surfaces it rather than silently dropping the result.
fn not_fired_result(data: &DifferentialSet) -> OracleResult {
    OracleResult {
        class: OracleClass::Existence,
        verdict: OracleVerdict::NotPresent,
        severity: None,
        confidence: 0,
        impact_class: None,
        reasons: vec![],
        signals: vec![parlov_core::Signal {
            kind: parlov_core::SignalKind::StatusCodeDiff,
            evidence: "technique did not fire: no 3xx status in differential".to_owned(),
            rfc_basis: None,
        }],
        technique_id: Some(data.technique.id.to_string()),
        vector: Some(data.technique.vector),
        normative_strength: Some(data.technique.strength),
        label: None,
        leaks: None,
        rfc_basis: None,
    }
}

#[cfg(test)]
#[path = "analyzer_tests.rs"]
mod tests;