parlov-analysis 0.7.0

Analysis engine trait and signal detection for parlov.
Documentation
//! Tests for input-reflection detection in the body signal extractor.
//!
//! When the only differential between baseline and probe response bodies is the
//! request URL identifier echoed back, we emit `SignalKind::InputReflection`
//! *instead of* `SignalKind::BodyDiff`. The `BodyDiff` would otherwise mislead
//! the verdict into a false-positive existence oracle.

use super::*;
use crate::signals::tests::single_diff_set_with_urls_and_bodies;
use parlov_core::SignalKind;

/// Real Juice Shop fixture: `/api/Users/{1,9999}` echoed inside an HTML error page.
#[test]
fn juice_shop_url_path_echo_detected_as_input_reflection() {
    let ds = single_diff_set_with_urls_and_bodies(
        "http://localhost:3000/api/Users/1",
        "http://localhost:3000/api/Users/9999",
        200,
        200,
        b"<html><title>Error: Unexpected path: /api/Users/1</title></html>",
        b"<html><title>Error: Unexpected path: /api/Users/9999</title></html>",
    );
    let signals = extract(&ds);

    assert_eq!(
        signals.len(),
        1,
        "expected exactly one InputReflection signal, got {signals:?}",
    );
    assert_eq!(signals[0].kind, SignalKind::InputReflection);
    assert!(
        !signals.iter().any(|s| s.kind == SignalKind::BodyDiff),
        "BodyDiff must be suppressed when reflection explains the diff",
    );
    let ev = &signals[0].evidence;
    assert!(
        ev.contains('1'),
        "evidence should mention baseline_id: {ev}"
    );
    assert!(
        ev.contains("9999"),
        "evidence should mention probe_id: {ev}"
    );
}

#[test]
fn body_diff_unrelated_to_id_still_emits_body_diff_not_reflection() {
    let ds = single_diff_set_with_urls_and_bodies(
        "http://api.test/users/1",
        "http://api.test/users/2",
        200,
        404,
        b"{\"name\": \"alice\"}",
        b"{\"error\": \"not found\"}",
    );
    let signals = extract(&ds);

    assert!(
        signals.iter().any(|s| s.kind == SignalKind::BodyDiff),
        "expected BodyDiff for unrelated content diff: {signals:?}",
    );
    assert!(
        !signals
            .iter()
            .any(|s| s.kind == SignalKind::InputReflection),
        "InputReflection must not fire for unrelated diff",
    );
}

#[test]
fn partial_id_echo_with_other_diffs_emits_body_diff() {
    // Body has the ID echo PLUS another unrelated diff (`ok` vs `missing`).
    // Reflection alone doesn't explain the full diff -> must fall through to BodyDiff.
    let ds = single_diff_set_with_urls_and_bodies(
        "http://api.test/x/1",
        "http://api.test/x/9999",
        200,
        200,
        b"id=1, status=ok",
        b"id=9999, status=missing",
    );
    let signals = extract(&ds);

    assert!(
        signals.iter().any(|s| s.kind == SignalKind::BodyDiff),
        "expected BodyDiff when reflection only partially explains diff: {signals:?}",
    );
    assert!(
        !signals
            .iter()
            .any(|s| s.kind == SignalKind::InputReflection),
        "InputReflection must not fire when other diffs remain",
    );
}

#[test]
fn identical_urls_no_reflection_check() {
    // No way to extract an ID — both URLs identical. Falls through to BodyDiff.
    let ds = single_diff_set_with_urls_and_bodies(
        "http://api.test/static",
        "http://api.test/static",
        200,
        200,
        b"alpha content",
        b"beta content",
    );
    let signals = extract(&ds);

    assert!(signals.iter().any(|s| s.kind == SignalKind::BodyDiff));
    assert!(!signals
        .iter()
        .any(|s| s.kind == SignalKind::InputReflection));
}

#[test]
fn id_appearing_multiple_times_in_body_handled() {
    // ID echoed twice in each body. Replace-all must normalize both occurrences.
    let ds = single_diff_set_with_urls_and_bodies(
        "http://localhost:3000/api/Users/1",
        "http://localhost:3000/api/Users/9999",
        200,
        200,
        b"User /api/Users/1 not found at /api/Users/1",
        b"User /api/Users/9999 not found at /api/Users/9999",
    );
    let signals = extract(&ds);

    assert_eq!(signals.len(), 1);
    assert_eq!(signals[0].kind, SignalKind::InputReflection);
}

#[test]
fn id_substring_collision_does_not_false_positive() {
    // Single-character IDs (1, 2) appear in unrelated body content (version numbers,
    // counts) but the bodies do NOT contain the URL path. Path-anchored matching
    // (needle = `<anchor>/x/1` etc.) finds zero occurrences in either body, so
    // detection correctly returns None and we fall through to BodyDiff. This is
    // the intended precision: reflection requires the URL path to actually appear
    // in the response, not just for the bare ID to substitute somewhere.
    let ds = single_diff_set_with_urls_and_bodies(
        "http://api.test/x/1",
        "http://api.test/x/2",
        200,
        200,
        b"version 1.5.1, count: 100, id 1",
        b"version 2.5.2, count: 200, id 2",
    );
    let signals = extract(&ds);

    assert!(
        signals.iter().any(|s| s.kind == SignalKind::BodyDiff),
        "bare-ID collisions without path echo must classify as BodyDiff: {signals:?}",
    );
    assert!(
        !signals
            .iter()
            .any(|s| s.kind == SignalKind::InputReflection),
        "InputReflection requires the URL path to appear in the body",
    );
}

#[test]
fn non_utf8_bodies_with_url_diff_skips_reflection_check() {
    // Reflection check requires UTF-8 substring replacement. Binary bodies fall
    // through to the existing BodyDiff path.
    let ds = single_diff_set_with_urls_and_bodies(
        "http://api.test/x/1",
        "http://api.test/x/2",
        200,
        200,
        &[0xFF, 0xFE, 0xFD],
        &[0xFF, 0xFE, 0xFC],
    );
    let signals = extract(&ds);

    assert!(
        signals.iter().any(|s| s.kind == SignalKind::BodyDiff),
        "non-UTF-8 bodies must fall through to BodyDiff: {signals:?}",
    );
    assert!(!signals
        .iter()
        .any(|s| s.kind == SignalKind::InputReflection));
}

#[test]
fn equal_bodies_no_signal_at_all() {
    // Regression: identical bodies must not emit BodyDiff or InputReflection,
    // even when URLs differ.
    let ds = single_diff_set_with_urls_and_bodies(
        "http://api.test/x/1",
        "http://api.test/x/2",
        200,
        200,
        b"identical content",
        b"identical content",
    );
    let signals = extract(&ds);

    assert!(
        !signals.iter().any(|s| s.kind == SignalKind::BodyDiff),
        "identical bodies must not emit BodyDiff: {signals:?}",
    );
    assert!(
        !signals
            .iter()
            .any(|s| s.kind == SignalKind::InputReflection),
        "identical bodies must not emit InputReflection: {signals:?}",
    );
}