parlov-analysis 0.7.0

Analysis engine trait and signal detection for parlov.
Documentation
use parlov_core::{DifferentialSet, Signal, SignalKind};

#[path = "body_reflection.rs"]
mod body_reflection;
use body_reflection::detect_input_reflection;

/// Maximum body length for including a content preview in evidence.
const MAX_PREVIEW_LEN: usize = 500;

/// Pushes body differential signals into `out`.
///
/// Compares the most recent baseline and probe response bodies. When the diff is fully
/// explained by the request URL identifier being echoed back, pushes a single
/// `InputReflection` signal. Otherwise pushes a `BodyDiff`. Always also pushes a `BodyDiff`
/// when `Content-Type` headers diverge.
pub fn extract_into(data: &DifferentialSet, out: &mut Vec<Signal>) {
    let Some(baseline) = data.baseline.last() else {
        return;
    };
    let Some(probe) = data.probe.last() else {
        return;
    };

    extract_body_or_reflection(
        &baseline.request.url,
        &probe.request.url,
        &baseline.response.body,
        &probe.response.body,
        out,
    );

    extract_content_type_diff(&baseline.response.headers, &probe.response.headers, out);
}

/// Owned-vec variant of [`extract_into`] for callers that want a fresh `Vec<Signal>`.
#[must_use]
pub fn extract(data: &DifferentialSet) -> Vec<Signal> {
    let mut out = Vec::new();
    extract_into(data, &mut out);
    out
}

/// Dispatches between `InputReflection` and `BodyDiff` for a single body pair.
///
/// If reflection detection succeeds, emits `InputReflection` and skips `BodyDiff` entirely
/// (the body diff is not real evidence of an existence oracle — it's just URL echo).
fn extract_body_or_reflection(
    baseline_url: &str,
    probe_url: &str,
    baseline_body: &[u8],
    probe_body: &[u8],
    signals: &mut Vec<Signal>,
) {
    if baseline_body == probe_body {
        return;
    }

    if let Some(evidence) =
        detect_input_reflection(baseline_url, probe_url, baseline_body, probe_body)
    {
        signals.push(Signal {
            kind: SignalKind::InputReflection,
            evidence,
            rfc_basis: None,
        });
        return;
    }

    let evidence = build_body_evidence(baseline_body, probe_body);
    signals.push(Signal {
        kind: SignalKind::BodyDiff,
        evidence,
        rfc_basis: None,
    });
}

fn build_body_evidence(baseline: &[u8], probe: &[u8]) -> String {
    let b_len = baseline.len();
    let p_len = probe.len();

    let mut parts = Vec::new();

    if b_len == p_len {
        parts.push(format!("body content differs (same length: {b_len} bytes)"));
    } else {
        parts.push(format!(
            "body length: {b_len} (baseline) vs {p_len} (probe)"
        ));
    }

    append_content_preview(baseline, probe, &mut parts);
    parts.join("; ")
}

/// Appends a body content preview, truncating per side at `MAX_PREVIEW_LEN`.
///
/// Bodies over the cap are truncated at a UTF-8 boundary and tagged with a
/// total-length marker. Non-UTF-8 bodies are summarized as `<N bytes, non-text>`
/// so consumers always see *something* per side instead of the whole preview
/// silently disappearing.
fn append_content_preview(baseline: &[u8], probe: &[u8], parts: &mut Vec<String>) {
    let b_repr = body_preview(baseline);
    let p_repr = body_preview(probe);
    parts.push(format!("baseline: {b_repr}, probe: {p_repr}"));
}

/// Renders one body as a preview string: full content if ≤ cap and UTF-8,
/// truncated UTF-8 with marker if over the cap, byte count if non-UTF-8.
fn body_preview(body: &[u8]) -> String {
    match std::str::from_utf8(body) {
        Ok(s) if s.len() <= MAX_PREVIEW_LEN => s.to_owned(),
        Ok(s) => {
            let cut = utf8_safe_truncate(s, MAX_PREVIEW_LEN);
            format!("{cut}… (truncated, total {}b)", body.len())
        }
        Err(_) => format!("<{} bytes, non-text>", body.len()),
    }
}

/// Truncates `s` to at most `max_bytes`, walking back to the nearest UTF-8
/// codepoint boundary so the slice is always valid UTF-8.
fn utf8_safe_truncate(s: &str, max_bytes: usize) -> &str {
    let mut end = max_bytes.min(s.len());
    while end > 0 && !s.is_char_boundary(end) {
        end -= 1;
    }
    &s[..end]
}

fn extract_content_type_diff(
    baseline_headers: &http::HeaderMap,
    probe_headers: &http::HeaderMap,
    signals: &mut Vec<Signal>,
) {
    let ct = |h: &http::HeaderMap| {
        h.get("content-type")
            .and_then(|v| v.to_str().ok())
            .map(str::to_owned)
    };
    if let (Some(b), Some(p)) = (ct(baseline_headers), ct(probe_headers)) {
        if b != p {
            signals.push(Signal {
                kind: SignalKind::BodyDiff,
                evidence: format!("content-type: {b} (baseline) vs {p} (probe)"),
                rfc_basis: None,
            });
        }
    }
}

#[cfg(test)]
#[path = "body_tests.rs"]
mod tests;

#[cfg(test)]
#[path = "body_reflection_tests.rs"]
mod reflection_tests;