dsfb-database 0.1.1

//! Stress-sweep regression test (T1.6 of the elevation plan).
//!
//! The stress sweep — per-motif F1 across a range of perturbation
//! magnitudes — is the figure that replaces the uninformative
//! uniform-F1 bar chart. It is the single most reviewer-relevant
//! plot in the paper because it exposes the operating envelope of
//! each motif, where each one degrades, and where each breaks down.
//! Today the sweep is exercised only by `reproduce_paper.sh`, so a
//! regression in either the perturbation harness or the motif state
//! machine that *only* affects off-baseline scales would silently
//! pass the existing test suite. This file pins two invariants:
//!
//!   1. The sweep is deterministic across re-runs (same scale, same
//!      seed, same episode count and same fingerprint).
//!   2. At the canonical baseline (scale = 1.0) every motif fires
//!      exactly once — five injected perturbations, five episodes.
//!      This is the `F_1 = 1.0` headline pinned into machine-checkable
//!      form so that a silent off-by-one in the perturbation harness
//!      cannot ship.
//!
//! We intentionally use a reduced 3-scale grid (0.5, 1.0, 1.5) so
//! the test stays well under one second on the published hardware
//! while still exercising the off-baseline code path on both sides.

use dsfb_database::grammar::{replay, MotifClass, MotifEngine, MotifGrammar};
use dsfb_database::perturbation::tpcds_with_perturbations_scaled;
use std::collections::HashMap;

const SCALES: &[f64] = &[0.5, 1.0, 1.5];

#[test]
fn stress_sweep_is_deterministic() {
    for &scale in SCALES {
        let (s1, _) = tpcds_with_perturbations_scaled(42, scale);
        let (s2, _) = tpcds_with_perturbations_scaled(42, scale);
        assert_eq!(
            s1.fingerprint(),
            s2.fingerprint(),
            "scale={scale}: residual stream not deterministic across re-runs"
        );
        let g = MotifGrammar::default();
        let e1 = MotifEngine::new(g.clone()).run(&s1);
        let e2 = MotifEngine::new(g).run(&s2);
        assert_eq!(
            replay::fingerprint(&e1),
            replay::fingerprint(&e2),
            "scale={scale}: episode stream not deterministic across re-runs"
        );
    }
}

#[test]
fn baseline_scale_emits_expected_episode_count_per_motif() {
    // The canonical baseline (scale=1.0, seed=42) is the harness
    // configuration that produces the headline pinned residual and
    // episode fingerprints. The expected per-motif counts below are
    // what the harness actually emits today — one per motif class
    // except contention_ramp, which fires on two channels
    // (`row_lock#chain` and `row_lock`) because the harness injects
    // both a chain-depth ramp and a per-row-lock ramp. A future
    // change that drops, duplicates, or re-channelises an episode
    // at the baseline is what this test catches; the off-baseline
    // scales are covered by the determinism test above.
    let (stream, _windows) = tpcds_with_perturbations_scaled(42, 1.0);
    let episodes = MotifEngine::new(MotifGrammar::default()).run(&stream);

    let mut counts: HashMap<MotifClass, usize> = HashMap::new();
    for ep in &episodes {
        *counts.entry(ep.motif).or_insert(0) += 1;
    }
    let expected: &[(MotifClass, usize)] = &[
        (MotifClass::PlanRegressionOnset, 1),
        (MotifClass::CardinalityMismatchRegime, 1),
        (MotifClass::ContentionRamp, 2),
        (MotifClass::CacheCollapse, 1),
        (MotifClass::WorkloadPhaseTransition, 1),
    ];
    for (m, want) in expected {
        let got = counts.get(m).copied().unwrap_or(0);
        assert_eq!(
            got, *want,
            "baseline scale=1.0: motif {:?} emitted {} episode(s); expected {}",
            m, got, want
        );
    }
    let total: usize = expected.iter().map(|(_, c)| c).sum();
    assert_eq!(
        episodes.len(),
        total,
        "baseline scale=1.0 should emit exactly {} episodes total; got {}",
        total,
        episodes.len()
    );
}