parlov-analysis 0.5.0

Analysis engine trait and signal detection for parlov.
Documentation
//! Body differential signal extractor.

use parlov_core::{DifferentialSet, Signal, SignalKind};

/// Maximum body length for including a content preview in evidence.
const MAX_PREVIEW_LEN: usize = 500;

/// Extracts body differential signals from baseline vs probe responses.
///
/// Compares the most recent baseline and probe response bodies. Produces a `BodyDiff` signal
/// when body content differs and an additional `BodyDiff` when `Content-Type` headers diverge.
#[must_use]
pub fn extract(data: &DifferentialSet) -> Vec<Signal> {
    let Some(baseline) = data.baseline.last() else {
        return vec![];
    };
    let Some(probe) = data.probe.last() else {
        return vec![];
    };

    let mut signals = Vec::new();

    extract_body_diff(
        &baseline.response.body,
        &probe.response.body,
        &mut signals,
    );

    extract_content_type_diff(
        &baseline.response.headers,
        &probe.response.headers,
        &mut signals,
    );

    signals
}

/// Compares body bytes and appends a `BodyDiff` signal when they differ.
fn extract_body_diff(
    baseline_body: &[u8],
    probe_body: &[u8],
    signals: &mut Vec<Signal>,
) {
    if baseline_body == probe_body {
        return;
    }

    let evidence = build_body_evidence(baseline_body, probe_body);
    signals.push(Signal {
        kind: SignalKind::BodyDiff,
        evidence,
        rfc_basis: None,
    });
}

/// Builds a human-readable evidence string describing the body difference.
fn build_body_evidence(baseline: &[u8], probe: &[u8]) -> String {
    let b_len = baseline.len();
    let p_len = probe.len();

    let mut parts = Vec::new();

    if b_len == p_len {
        parts.push(format!("body content differs (same length: {b_len} bytes)"));
    } else {
        parts.push(format!("body length: {b_len} (baseline) vs {p_len} (probe)"));
    }

    append_content_preview(baseline, probe, &mut parts);
    parts.join("; ")
}

/// Appends truncated body content previews when both sides are short UTF-8.
fn append_content_preview(
    baseline: &[u8],
    probe: &[u8],
    parts: &mut Vec<String>,
) {
    if baseline.len() > MAX_PREVIEW_LEN || probe.len() > MAX_PREVIEW_LEN {
        return;
    }
    let (Ok(b_str), Ok(p_str)) = (
        std::str::from_utf8(baseline),
        std::str::from_utf8(probe),
    ) else {
        return;
    };
    parts.push(format!("baseline: {b_str}, probe: {p_str}"));
}

/// Compares `Content-Type` headers and appends a `BodyDiff` signal when they differ.
fn extract_content_type_diff(
    baseline_headers: &http::HeaderMap,
    probe_headers: &http::HeaderMap,
    signals: &mut Vec<Signal>,
) {
    let b_ct = baseline_headers
        .get("content-type")
        .and_then(|v| v.to_str().ok());
    let p_ct = probe_headers
        .get("content-type")
        .and_then(|v| v.to_str().ok());

    match (b_ct, p_ct) {
        (Some(b), Some(p)) if b != p => {
            signals.push(Signal {
                kind: SignalKind::BodyDiff,
                evidence: format!("content-type: {b} (baseline) vs {p} (probe)"),
                rfc_basis: None,
            });
        }
        _ => {}
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::signals::tests::{
        fake_exchange_with_body, single_diff_set_with_bodies,
    };

    #[test]
    fn identical_bodies_produce_no_signal() {
        let ds = single_diff_set_with_bodies(403, 403, b"denied", b"denied");
        let signals = extract(&ds);
        assert!(signals.is_empty());
    }

    #[test]
    fn different_bodies_produce_body_diff_signal() {
        let ds = single_diff_set_with_bodies(
            403, 403,
            b"access denied",
            b"not found for this resource",
        );
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::BodyDiff);
        assert!(signals[0].evidence.contains("body length:"));
        assert!(signals[0].evidence.contains("13 (baseline)"));
        assert!(signals[0].evidence.contains("27 (probe)"));
    }

    #[test]
    fn same_length_different_content_noted() {
        let ds = single_diff_set_with_bodies(403, 403, b"abc", b"xyz");
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert!(signals[0].evidence.contains("same length: 3 bytes"));
    }

    #[test]
    fn utf8_bodies_include_content_preview() {
        let ds = single_diff_set_with_bodies(
            403, 404,
            br#"{"error":"access denied"}"#,
            br#"{"error":"not found"}"#,
        );
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert!(signals[0].evidence.contains("baseline:"));
        assert!(signals[0].evidence.contains("probe:"));
        assert!(signals[0].evidence.contains("access denied"));
        assert!(signals[0].evidence.contains("not found"));
    }

    #[test]
    fn long_bodies_omit_preview() {
        let long_a = vec![b'a'; 501];
        let long_b = vec![b'b'; 501];
        let ds = single_diff_set_with_bodies(403, 403, &long_a, &long_b);
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert!(!signals[0].evidence.contains("baseline:"));
    }

    #[test]
    fn different_content_type_produces_signal() {
        let mut baseline = fake_exchange_with_body(403, b"error");
        baseline.response.headers.insert(
            http::header::CONTENT_TYPE,
            http::HeaderValue::from_static("application/json"),
        );
        let mut probe = fake_exchange_with_body(403, b"error");
        probe.response.headers.insert(
            http::header::CONTENT_TYPE,
            http::HeaderValue::from_static("text/html"),
        );
        let ds = DifferentialSet {
            baseline: vec![baseline],
            probe: vec![probe],
            technique: crate::signals::tests::status_code_diff_technique(),
        };
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::BodyDiff);
        assert!(signals[0].evidence.contains("content-type"));
        assert!(signals[0].evidence.contains("application/json"));
        assert!(signals[0].evidence.contains("text/html"));
    }

    #[test]
    fn empty_exchanges_produce_no_signals() {
        let ds = crate::signals::tests::diff_set_with_statuses(&[], &[]);
        assert!(extract(&ds).is_empty());
    }

    #[test]
    fn one_empty_one_nonempty_produces_signal() {
        let ds = single_diff_set_with_bodies(403, 403, b"", b"error body");
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::BodyDiff);
    }
}