parlov-output 0.8.0

Output formatters for parlov: SARIF, terminal table, and raw JSON.
Documentation
//! Internal SARIF construction helpers.
//!
//! All items are `pub(crate)`. Nothing here is part of the public API.

use std::collections::BTreeMap;
use std::fmt::Write as _;

use parlov_core::{finding_id, EndpointVerdict, OracleResult, OracleVerdict};
use serde_json::json;
use serde_sarif::sarif::{
    ArtifactLocation, Location, Message, MultiformatMessageString, PhysicalLocation, PropertyBag,
    ReportingDescriptor, Result as SarifResult, ResultLevel, Run, Sarif, Tool, ToolComponent,
};

use crate::ScanFinding;

pub(crate) fn host_path(url: &str) -> String {
    url.strip_prefix("https://")
        .or_else(|| url.strip_prefix("http://"))
        .unwrap_or(url)
        .to_owned()
}

fn security_severity(confidence: u8) -> f64 {
    f64::from(confidence) / 10.0
}

fn vector_str(result: &OracleResult) -> String {
    result
        .vector
        .map_or_else(|| "Unknown".to_owned(), |v| v.to_string())
}

fn make_property_bag(props: BTreeMap<String, serde_json::Value>) -> PropertyBag {
    PropertyBag::builder().additional_properties(props).build()
}

pub(crate) fn build_rule(result: &OracleResult) -> ReportingDescriptor {
    let technique_id = result.technique_id.as_deref().unwrap_or("unknown");
    let oracle_class = result.class.to_string();
    let mut props = BTreeMap::new();
    props.insert("oracle_class".to_owned(), json!(oracle_class));
    props.insert("vector".to_owned(), json!(vector_str(result)));
    props.insert(
        "security-severity".to_owned(),
        json!(format!("{:.1}", security_severity(result.confidence))),
    );
    let short_desc = MultiformatMessageString::builder()
        .text(
            result
                .label
                .as_deref()
                .unwrap_or("HTTP differential oracle")
                .to_owned(),
        )
        .build();
    ReportingDescriptor::builder()
        .id(technique_id)
        .name(format!("{oracle_class}Oracle"))
        .short_description(short_desc)
        .properties(make_property_bag(props))
        .build()
}

/// Removes duplicate rules by id, preserving the last occurrence of each id.
///
/// Sort by id then dedup — avoids per-element String clone that the `HashSet` approach required.
pub(crate) fn deduplicate_rules(mut rules: Vec<ReportingDescriptor>) -> Vec<ReportingDescriptor> {
    rules.sort_by(|a, b| a.id.cmp(&b.id));
    rules.dedup_by(|a, b| a.id == b.id);
    rules
}

pub(crate) struct ResultContext<'a> {
    pub(crate) target_url: &'a str,
    pub(crate) result: &'a OracleResult,
    pub(crate) strategy_id: &'a str,
    pub(crate) method: &'a str,
}

/// Returns `None` for `NotPresent` verdicts.
pub(crate) fn build_sarif_result(ctx: &ResultContext<'_>) -> Option<SarifResult> {
    if ctx.result.verdict == OracleVerdict::NotPresent {
        return None;
    }
    let technique_id = ctx.result.technique_id.as_deref().unwrap_or("unknown");
    let oracle_class = ctx.result.class.to_string();
    let fid = finding_id(
        technique_id,
        ctx.target_url,
        &oracle_class,
        ctx.method,
        ctx.strategy_id,
    );
    let message_text = ctx
        .result
        .leaks
        .as_deref()
        .unwrap_or_else(|| ctx.result.primary_evidence());
    let level = match ctx.result.verdict {
        OracleVerdict::Confirmed => ResultLevel::Error,
        OracleVerdict::Likely => ResultLevel::Warning,
        OracleVerdict::Inconclusive | OracleVerdict::NotPresent => ResultLevel::Note,
    };
    let mut fingerprints = BTreeMap::new();
    fingerprints.insert("oracleFingerprint/v1".to_owned(), fid);
    let mut partial_fingerprints = BTreeMap::new();
    partial_fingerprints.insert(
        "techniqueTargetHash/v1".to_owned(),
        format!("{}:{}", technique_id, host_path(ctx.target_url)),
    );
    Some(
        SarifResult::builder()
            .rule_id(technique_id)
            .level(level)
            .message(Message::builder().text(message_text.to_owned()).build())
            .locations(vec![build_location(ctx.target_url)])
            .fingerprints(fingerprints)
            .partial_fingerprints(partial_fingerprints)
            .related_locations(build_related_locations(ctx.result))
            .properties(build_result_properties(ctx.result, ctx.method))
            .build(),
    )
}

fn build_location(target_url: &str) -> Location {
    let artifact = ArtifactLocation::builder()
        .uri(target_url.to_owned())
        .build();
    let physical = PhysicalLocation::builder()
        .artifact_location(artifact)
        .build();
    Location::builder().physical_location(physical).build()
}

fn build_related_locations(result: &OracleResult) -> Vec<Location> {
    result
        .signals
        .iter()
        .enumerate()
        .map(|(i, signal)| {
            let mut msg = format!("[{}] {}", signal.kind, signal.evidence);
            if let Some(rfc) = &signal.rfc_basis {
                let _ = write!(msg, " ({rfc})");
            }
            Location::builder()
                .id(i64::try_from(i).unwrap_or(0))
                .message(Message::builder().text(msg).build())
                .build()
        })
        .collect()
}

fn build_result_properties(result: &OracleResult, method: &str) -> PropertyBag {
    let mut props = BTreeMap::new();
    props.insert("oracle_class".to_owned(), json!(result.class.to_string()));
    props.insert("verdict".to_owned(), json!(result.verdict.to_string()));
    props.insert("confidence".to_owned(), json!(result.confidence));
    props.insert("method".to_owned(), json!(method));
    if let Some(ic) = result.impact_class {
        props.insert("impact_class".to_owned(), json!(ic.to_string()));
    }
    if !result.reasons.is_empty() {
        props.insert("reasons".to_owned(), json!(result.reasons));
    }
    make_property_bag(props)
}

pub(crate) fn build_sarif_document(
    rules: Vec<ReportingDescriptor>,
    results: Vec<SarifResult>,
    run_properties: Option<PropertyBag>,
) -> Sarif {
    let driver = ToolComponent::builder()
        .name("parlov")
        .version(env!("CARGO_PKG_VERSION"))
        .rules(rules)
        .build();
    let run = build_run(driver, results, run_properties);
    Sarif::builder()
        .schema(serde_sarif::sarif::SCHEMA_URL.to_owned())
        .version(json!("2.1.0"))
        .runs(vec![run])
        .build()
}

fn build_run(driver: ToolComponent, results: Vec<SarifResult>, props: Option<PropertyBag>) -> Run {
    match props {
        Some(p) => Run::builder()
            .tool(Tool::from(driver))
            .results(results)
            .properties(p)
            .build(),
        None => Run::builder()
            .tool(Tool::from(driver))
            .results(results)
            .build(),
    }
}

pub(crate) fn build_verdict_run_properties(
    target_url: &str,
    verdict: &EndpointVerdict,
) -> PropertyBag {
    let mut props = BTreeMap::new();
    props.insert("target_url".to_owned(), json!(target_url));
    props.insert(
        "endpoint_verdict".to_owned(),
        json!(verdict.verdict.to_string()),
    );
    props.insert(
        "posterior_probability".to_owned(),
        json!(verdict.posterior_probability),
    );
    props.insert("strategies_run".to_owned(), json!(verdict.strategies_run));
    props.insert(
        "strategies_total".to_owned(),
        json!(verdict.strategies_total),
    );
    props.insert(
        "observability_status".to_owned(),
        json!(verdict.observability_status.to_string()),
    );
    if let Some(reason) = &verdict.stop_reason {
        props.insert("stop_reason".to_owned(), json!(reason.to_string()));
    }
    if let Some(bs) = &verdict.block_summary {
        if let Some(action) = &bs.operator_action {
            props.insert("operator_action".to_owned(), json!(action));
        }
    }
    make_property_bag(props)
}

/// Converts findings to SARIF results, optionally filtering under endpoint-level `NotPresent`.
///
/// When a finding carries `repro` (because `--repro` was set), the per-result
/// `properties` bag is post-augmented with a `repro` object containing the two
/// curl commands. The `probe`/`exchange`/`chain_provenance` blocks are attached
/// as additional properties — they are not standard SARIF fields, so they live
/// in the spec-blessed `properties` extension point.
pub(crate) fn findings_to_results(
    target_url: &str,
    findings: &[ScanFinding],
    verdict: Option<&EndpointVerdict>,
) -> Vec<SarifResult> {
    findings
        .iter()
        .filter_map(|f| {
            if verdict.is_some_and(|ev| {
                ev.verdict == OracleVerdict::NotPresent
                    && f.result.verdict == OracleVerdict::NotPresent
            }) {
                return None;
            }
            let ctx = ResultContext {
                target_url,
                result: &f.result,
                strategy_id: &f.strategy_id,
                method: &f.method,
            };
            build_sarif_result(&ctx).map(|r| attach_finding_extras(r, f))
        })
        .collect()
}

fn attach_finding_extras(mut result: SarifResult, finding: &ScanFinding) -> SarifResult {
    let bag = result.properties.get_or_insert_with(PropertyBag::default);
    let mut additional = std::mem::take(&mut bag.additional_properties);
    if let Some(info) = finding.repro.as_ref() {
        additional.insert(
            "repro".to_owned(),
            json!({ "baseline_curl": info.baseline_curl, "probe_curl": info.probe_curl }),
        );
    }
    additional.insert("probe".to_owned(), json!(&finding.probe));
    additional.insert("exchange".to_owned(), json!(&finding.exchange));
    if let Some(prov) = finding.chain_provenance.as_ref() {
        additional.insert("chain_provenance".to_owned(), json!(prov));
    }
    bag.additional_properties = additional;
    result
}

#[cfg(test)]
#[path = "sarif_builder_tests.rs"]
mod tests;