ktstr 0.16.0 - Docs.rs

//! Stimulus/phase correlation for scenario execution.
//!
//! Correlates [`StimulusEvent`]s (cgroup operations, cpuset changes)
//! with `MonitorSample` windows to
//! measure per-phase scheduler behavior degradation. Produces
//! [`Timeline`] entries consumed by the stats and reporting pipeline.

use std::fmt;

use crate::monitor::{MonitorSample, sample_looks_valid};

// ---------------------------------------------------------------------------
// TimelineContext — system context rendered as a header
// ---------------------------------------------------------------------------

/// System context for a timeline, rendered as a header block.
#[derive(Debug, Clone, Default)]
pub struct TimelineContext {
    /// Kernel version string (e.g. "6.14.0-rc3+").
    pub kernel: Option<String>,
    /// Topology description (e.g. "2n4l4c2t (16 cpus)").
    pub topology: Option<String>,
    /// Scheduler name (e.g. "scx_mitosis").
    pub scheduler: Option<String>,
    /// Scenario name.
    pub scenario: Option<String>,
    /// Total run duration in seconds.
    pub duration_s: Option<f64>,
}

// ---------------------------------------------------------------------------
// StimulusEvent — what happened and when
// ---------------------------------------------------------------------------

/// A discrete event during scenario execution that may cause observable
/// changes in scheduler behavior. Generated by step executors on the guest
/// side and carried in the VM output alongside monitor samples.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct StimulusEvent {
    /// Milliseconds since scenario start (guest monotonic clock).
    pub elapsed_ms: u64,
    /// Human-readable label. Produced as `"StepStart[k]"` by
    /// [`Self::from_wire`] (the 0-indexed scenario Step ordinal),
    /// `"ScenarioEnd"` by [`Self::terminal`], and the
    /// `"BASELINE"`/`"Step[k]"` bucket label by the
    /// `phase_from_bucket` placeholder. Test fixtures may carry any
    /// label.
    pub label: String,
    /// What kind of operation triggered this event.
    pub op_kind: Option<String>,
    /// Additional context (e.g. "4 cpus", "cgroup=cg_0").
    pub detail: Option<String>,
    /// Cumulative worker iterations at this event. `Some(_)` for every
    /// event built from the wire (the wire counter is always present —
    /// see [`Self::from_wire`]); a cumulative counter for which
    /// `Some(0)` is a legitimate "no iterations accumulated yet"
    /// baseline, NOT a missing sample. `None` only for synthetic /
    /// placeholder events that carry no counter (the
    /// `phase_from_bucket` fallback and test fixtures). Used to
    /// compute per-phase throughput (iterations/s) as the delta
    /// between consecutive events.
    ///
    /// SEMANTICS: this is the sum of the iteration counters of the
    /// worker handles ALIVE at the event instant (step-local +
    /// Backdrop). Each step emits BOTH a StepStart event (counter at the
    /// step's start) and a StepEnd event ([`Self::is_step_end`], counter
    /// at the step's end-of-hold), so the per-phase iteration_rate is the
    /// STEP-LOCAL delta `StepEnd[k] - StepStart[k]` — each step's OWN
    /// workers measured start-to-end. That works for workers respawned
    /// per step (the cross-step `StepStart[k+1] - StepStart[k]` delta
    /// reads fresh~0 - fresh~0 and is dropped) AND is more accurate for
    /// persistent (Backdrop) workers (it excludes the inter-step
    /// teardown/respawn wall-time the cross-step window spanned). Bucket
    /// `k` is sourced ONLY by its `StepStart[k] -> StepEnd[k]` pair: the
    /// `iteration_rate` attribution loop in
    /// [`crate::assert::build_phase_buckets_with_stimulus`] skips any
    /// `is_step_end` `prev`, so a stalled step whose step-local delta is
    /// zero (`StepEnd[k] == StepStart[k]`) reports its MEASURED-ZERO rate
    /// `Some(0.0)` (see `Self::rate_to`) rather than leaking the
    /// inter-step gap rate from the `StepEnd[k] -> StepStart[k+1]` pair.
    /// The monitor-only
    /// [`Timeline::build`] fallback (no snapshot captures) computes the
    /// SAME step-local `StepStart[k] -> StepEnd[k]` rate — the StepEnd
    /// events reach it too (they are emitted independent of captures) — and
    /// falls back to cross-step (or the terminal for the last step) only
    /// when a step has no StepEnd (sched-died / legacy data); StepEnd is
    /// filtered only from that path's phase LAYOUT, not its rate.
    pub total_iterations: Option<u64>,
    /// 1-indexed scenario step this event belongs to (the same
    /// encoding the bridge stamps: `1..=N` for Step ordinals), or
    /// `None` for non-step events (including the terminal scenario-end
    /// boundary; see `is_terminal`). Carried explicitly from the wire
    /// `StimulusPayload.step_index` so the periodic-capture phase
    /// attribution can map a capture's workload-relative boundary
    /// offset onto the guest's own step timeline without parsing the
    /// human-readable `label`.
    pub step_index: Option<u16>,
    /// True only for the synthetic scenario-end boundary the eval
    /// walker appends from the `ScenarioEnd` wire frame's final
    /// `total_iterations`. On a CLEAN run the last step emits its own
    /// `StepEnd[N]`, which supplies that step's `iteration_rate` right
    /// boundary in BOTH rate consumers — the snapshot path
    /// ([`crate::assert::build_phase_buckets_with_stimulus`], the
    /// `StepStart[N]` -> `StepEnd[N]` pair) and the monitor-only
    /// [`Timeline::build`] fallback (which looks up each step's `StepEnd`
    /// by `step_index`) — and the terminal is then NOT consumed for a
    /// rate: the snapshot path's attribution loop skips the
    /// `(StepEnd[N], terminal)` pair via its `is_step_end` guard (before
    /// `rate_to` is reached), and `Timeline::build` reaches for the
    /// terminal only when a step's `StepEnd` lookup misses. The terminal
    /// is consumed as a step's rate boundary ONLY for legacy/synthetic
    /// data that carries a `ScenarioEnd` frame but no `StepEnd` frames
    /// (fresh guest output always pairs them). A sched-died step is NOT
    /// such a case: its early return skips BOTH the `StepEnd` emission AND
    /// `send_scenario_end`, so neither frame exists and the dead step
    /// reports no rate via the no-successor path. It is NOT a step start:
    /// `step_index` is `None` so it seeds no [`crate::assert::PhaseBucket`]
    /// (excluded from the step-start timeline), and [`Timeline::build`]
    /// skips it when laying out phase boundaries so it never renders a
    /// phantom trailing phase.
    pub is_terminal: bool,
    /// True for a per-step END event (decoded from a
    /// `crate::vmm::wire::MsgType::StepEnd` frame via
    /// [`Self::from_step_end`]). It carries the SAME 1-indexed
    /// `step_index` as its StepStart and its step's end-of-hold
    /// `total_iterations`, so [`crate::assert::build_phase_buckets_with_stimulus`]'s
    /// elapsed-sorted `windows(2)` pairs `StepStart[k]` -> `StepEnd[k]`
    /// first and `or_insert` keeps that step-local rate. NOT a step
    /// start, so [`Timeline::build`] (the monitor-only fallback's
    /// index-based cross-step pairing) filters it out of its step-start
    /// list to avoid a phantom phase.
    pub is_step_end: bool,
}

impl StimulusEvent {
    /// Build a timeline event from a deserialized wire stimulus event.
    /// Centralizes the wire→timeline mapping so the production eval path
    /// (`evaluate_vm_result`) and out-of-tree consumers — post_vm
    /// callbacks folding `VmResult::stimulus_timeline()` (which calls
    /// this internally) through
    /// [`crate::assert::build_phase_buckets_with_stimulus`] — produce
    /// identical events. The wire `step_index` is the bridge 1-indexed
    /// convention (`Step[k]` -> `k + 1`, BASELINE owns 0); the human
    /// `label` renders the 0-indexed Scenario-Step ordinal
    /// (`step_index - 1`) to match the `PhaseBucket` `Step[k]` labels,
    /// while the `step_index` field keeps the 1-indexed wire value for
    /// phase-bucket remap. `total_iterations` is carried verbatim as
    /// `Some(_)`: the wire field is a cumulative counter that is always
    /// populated (the guest sums live worker iterations at every step
    /// boundary), so `0` is a legitimate baseline reading — the FIRST
    /// step's frame fires right after its workers spawn and genuinely
    /// reads ~0. Collapsing that `0` to `None` (the old behavior) made
    /// the (first, second) delta pair fail the `Some`/`Some` guard in
    /// both rate consumers, silently dropping the first step's
    /// `iteration_rate`; carrying `Some(0)` lets the delta compute the
    /// first step's throughput for the PERSISTENT (Backdrop) population
    /// (see the `total_iterations` field doc for the persistent-vs-
    /// step-local semantics this delta measures).
    pub fn from_wire(ev: &crate::vmm::wire::StimulusEvent) -> Self {
        Self {
            elapsed_ms: ev.elapsed_ms as u64,
            label: format!("StepStart[{}]", ev.step_index.saturating_sub(1)),
            op_kind: Some(format!("ops={}", ev.op_count)),
            detail: Some(format!(
                "{} cgroups, {} workers",
                ev.cgroup_count, ev.worker_count,
            )),
            total_iterations: Some(ev.total_iterations),
            step_index: Some(ev.step_index),
            is_terminal: false,
            is_step_end: false,
        }
    }

    /// Build a per-step END event from a `crate::vmm::wire::MsgType::StepEnd`
    /// frame (reuses the `crate::vmm::wire::StimulusEvent` wire body).
    /// Carries the SAME 1-indexed `step_index` as the step's StepStart
    /// and the step's end-of-hold `total_iterations`, with `is_step_end`
    /// set. Elapsed-sorted, a step's events order `StepStart[k]` (start) <
    /// `StepEnd[k]` (end-of-hold) < `StepStart[k+1]`, so
    /// [`crate::assert::build_phase_buckets_with_stimulus`]'s `windows(2)`
    /// pairs `StepStart[k]` -> `StepEnd[k]` first and `or_insert` keeps that
    /// step-local rate. `is_terminal` is false (it is a real per-step
    /// boundary, not the scenario-end terminal).
    pub fn from_step_end(ev: &crate::vmm::wire::StimulusEvent) -> Self {
        Self {
            elapsed_ms: ev.elapsed_ms as u64,
            label: format!("StepEnd[{}]", ev.step_index.saturating_sub(1)),
            op_kind: Some(format!("ops={}", ev.op_count)),
            detail: Some(format!(
                "{} cgroups, {} workers",
                ev.cgroup_count, ev.worker_count,
            )),
            total_iterations: Some(ev.total_iterations),
            step_index: Some(ev.step_index),
            is_terminal: false,
            is_step_end: true,
        }
    }

    /// Build the synthetic terminal boundary event from the
    /// `ScenarioEnd` wire frame's final cumulative `total_iterations`
    /// and scenario-relative `elapsed_ms`. Appended once, after every
    /// per-step [`Self::from_wire`] event. On a clean run `StepEnd[N]`
    /// supplies the last step's `iteration_rate` right boundary in both
    /// rate consumers and the terminal is not consumed for a rate; it is
    /// consumed as a step's boundary ONLY for legacy/synthetic data with a
    /// `ScenarioEnd` frame but no `StepEnd` frames (a sched-died step has
    /// neither, since the early return skips both emissions) — see the
    /// [`Self::is_terminal`] field doc.
    /// `step_index` is `None` (it is not a step start — it seeds no
    /// [`crate::assert::PhaseBucket`]) and `is_terminal` is set so
    /// [`Timeline::build`] treats it as a right boundary only, never a
    /// phase. `elapsed_ms` is in the same guest-monotonic frame as the
    /// step events (both come from `scenario_start.elapsed()`), so the
    /// last-step duration is well-formed.
    pub fn terminal(elapsed_ms: u64, total_iterations: u64) -> Self {
        Self {
            elapsed_ms,
            label: "ScenarioEnd".to_string(),
            op_kind: None,
            detail: None,
            total_iterations: Some(total_iterations),
            step_index: None,
            is_terminal: true,
            is_step_end: false,
        }
    }

    /// Iterations-per-second from this event to `next`:
    /// `(next.total_iterations - self.total_iterations)` over the
    /// guest-clock elapsed-ms delta between them. Returns `None` ONLY when
    /// the measurement is genuinely undefined: either event lacks a
    /// `total_iterations` sample, the window is zero-length, or the count
    /// went BACKWARD (`next < self` — a worker-population reset; the delta
    /// is unmeasurable, not zero).
    ///
    /// MEASURED ZERO is distinct from not-measured: a step whose workers
    /// made exactly zero forward progress over a positive hold
    /// (`next == self`) returns `Some(0.0)`, not `None`. Zero throughput
    /// is a real, measured value — the strongest degradation signal — so
    /// it must surface, not vanish. With `Some(0.0)` a phase that
    /// collapsed to zero IS visible to the throughput-degradation detector
    /// ([`Timeline::build`] / [`Timeline::from_phase_buckets`]): when the
    /// prior phase had a positive rate (`before > 0.0`), the relative
    /// delta is `-1.0` and the drop is flagged. (A phase that was already
    /// zero before is still not relatively comparable — the detector's
    /// `before > 0.0` gate avoids a div-by-zero — but an *unchanged* zero
    /// is not a degradation.)
    ///
    /// This is the SINGLE iteration_rate formula shared by
    /// [`crate::assert::build_phase_buckets_with_stimulus`] (per-step
    /// windows attributed by `step_index`) and [`Timeline::build`]
    /// (per-phase windows attributed by index) — the two callers pair
    /// events differently but must compute the rate identically.
    pub(crate) fn rate_to(&self, next: &StimulusEvent) -> Option<f64> {
        let s = self.total_iterations?;
        let e = next.total_iterations?;
        if e < s {
            return None;
        }
        let duration_ms = next.elapsed_ms.saturating_sub(self.elapsed_ms);
        if duration_ms == 0 {
            return None;
        }
        Some((e - s) as f64 / (duration_ms as f64 / 1000.0))
    }
}

// ---------------------------------------------------------------------------
// Phase — a time window between consecutive stimulus events
// ---------------------------------------------------------------------------

/// Metrics aggregated from monitor samples within a phase.
#[derive(Debug, Clone, Default)]
pub struct PhaseMetrics {
    pub sample_count: usize,
    /// Mean CPU-imbalance ratio over the phase's valid samples. `None`
    /// when the phase had no valid samples (monitor-only `Timeline::build`)
    /// or its source bucket carried no `avg_imbalance_ratio` metric
    /// (snapshot `from_phase_buckets`) — distinct from a real `Some(0.0)`
    /// (perfectly balanced). The change detector compares it only when
    /// both sides are `Some`, so an absent phase never reads as a false
    /// zero-imbalance.
    pub avg_imbalance: Option<f64>,
    /// Peak CPU-imbalance ratio over the phase's valid samples. `None` on
    /// the same no-data conditions as [`Self::avg_imbalance`].
    pub max_imbalance: Option<f64>,
    /// Mean local-DSQ depth over the phase's valid samples. `None` on the
    /// same no-data conditions as [`Self::avg_imbalance`].
    pub avg_dsq_depth: Option<f64>,
    pub max_dsq_depth: u32,
    pub stall_count: usize,
    /// select_cpu_fallback events per second. None when event counters unavailable.
    pub fallback_rate: Option<f64>,
    /// dispatch_keep_last events per second. None when event counters unavailable.
    pub keep_last_rate: Option<f64>,
    /// Worker iterations per second during this phase. Computed from
    /// cumulative iteration counts in consecutive stimulus events.
    pub iteration_rate: Option<f64>,
}

/// Direction of change at a phase boundary.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChangeDirection {
    Improved,
    Degraded,
}

impl fmt::Display for ChangeDirection {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            ChangeDirection::Improved => write!(f, "IMPROVEMENT"),
            ChangeDirection::Degraded => write!(f, "DEGRADATION"),
        }
    }
}

/// Detected change at a stimulus boundary.
#[derive(Debug, Clone)]
pub struct PhaseChange {
    pub direction: ChangeDirection,
    pub metric: String,
    pub before: f64,
    pub after: f64,
}

/// A time window between two consecutive stimulus events.
#[derive(Debug, Clone)]
pub struct Phase {
    pub index: usize,
    pub start_ms: u64,
    pub end_ms: u64,
    /// The stimulus event that starts this phase (None for the initial phase).
    pub stimulus: Option<StimulusEvent>,
    pub metrics: PhaseMetrics,
    /// Changes detected at this phase's stimulus boundary.
    pub changes: Vec<PhaseChange>,
}

// ---------------------------------------------------------------------------
// Timeline
// ---------------------------------------------------------------------------

/// Correlated timeline of stimulus events and monitor observations.
#[derive(Debug, Clone)]
pub struct Timeline {
    pub phases: Vec<Phase>,
}

/// Minimum delta in imbalance ratio to flag a change (avoids noise).
const IMBALANCE_THRESHOLD: f64 = 0.5;
/// Minimum delta in DSQ depth to flag a change.
const DSQ_THRESHOLD: f64 = 3.0;
/// Minimum delta in fallback rate (events/s) to flag a change.
const FALLBACK_RATE_THRESHOLD: f64 = 10.0;
/// Minimum delta in keep_last rate (events/s) to flag a change.
const KEEP_LAST_RATE_THRESHOLD: f64 = 10.0;
/// Minimum relative change in iteration rate to flag a throughput change.
/// 0.3 = 30% drop or increase.
const ITERATION_RATE_REL_THRESHOLD: f64 = 0.3;

/// Create a PhaseChange if the delta between `before` and `after` exceeds
/// `threshold`. `higher_is_worse` determines degradation direction: when
/// true, a positive delta means Degraded; when false, a negative delta
/// means Degraded.
fn detect_change(
    before: f64,
    after: f64,
    threshold: f64,
    metric: &str,
    higher_is_worse: bool,
) -> Option<PhaseChange> {
    let delta = after - before;
    if delta.abs() <= threshold {
        return None;
    }
    let degraded = if higher_is_worse {
        delta > 0.0
    } else {
        delta < 0.0
    };
    Some(PhaseChange {
        direction: if degraded {
            ChangeDirection::Degraded
        } else {
            ChangeDirection::Improved
        },
        metric: metric.to_string(),
        before,
        after,
    })
}

impl Timeline {
    /// Build a timeline from stimulus events and monitor samples.
    ///
    /// Clock alignment: stimulus events use guest monotonic time (ms since
    /// scenario start). Monitor samples use host monotonic time (ms since
    /// VM boot). The first stimulus event's timestamp and the first
    /// non-trivial monitor sample (after 500ms warmup) approximately
    /// coincide. We compute an offset to align them.
    ///
    /// Returns an empty timeline if either input is empty.
    /// Build a Timeline from stimulus events + raw monitor
    /// samples via the per-window `compute_metrics` reduction.
    /// The production success path uses [`Self::from_phase_buckets`]
    /// (which folds pre-bucketed PhaseBuckets) ; `build` is the
    /// fallback evaluate_vm_result takes for monitor-only runs
    /// (no snapshot bridge captures → PhaseBuckets vec is empty
    /// but monitor samples exist) so the failure-message
    /// timeline still renders. Both entry points produce the
    /// same Timeline field shape; from_phase_buckets is
    /// preferred when buckets are available because it avoids
    /// the per-MonitorSample reduction.
    pub fn build(stimulus_events: &[StimulusEvent], monitor_samples: &[MonitorSample]) -> Self {
        if stimulus_events.is_empty() || monitor_samples.is_empty() {
            return Self { phases: Vec::new() };
        }

        let mut events = stimulus_events.to_vec();
        events.sort_by_key(|e| e.elapsed_ms);

        // Clock alignment: find the offset between guest stimulus time
        // and host monitor time. The first stimulus event (ScenarioStart)
        // and the first monitor sample with plausible data roughly coincide.
        let first_stimulus_ms = events[0].elapsed_ms;
        let first_monitor_ms = monitor_samples
            .iter()
            .find(|s| s.elapsed_ms > 500 && !s.cpus.is_empty())
            .map(|s| s.elapsed_ms)
            .unwrap_or_else(|| monitor_samples.first().map(|s| s.elapsed_ms).unwrap_or(0));

        // offset: add this to a stimulus timestamp to get monitor time
        let offset = first_monitor_ms as i64 - first_stimulus_ms as i64;

        // Define phase boundaries from consecutive stimulus events.
        // Each pair (events[i], events[i+1]) bounds a phase.
        // The last event to end-of-data is also a phase.
        let last_monitor_ms = monitor_samples.last().map(|s| s.elapsed_ms).unwrap_or(0);

        // The terminal scenario-end event is a rate right
        // boundary ONLY — it seeds no phase. Extract it explicitly
        // rather than relying on it sorting last for positional
        // alignment: a corrupt / out-of-order step `elapsed_ms` (a u32
        // read off the wire) could otherwise shift it into the middle
        // of `events` and misalign the dense phase index against the
        // step events. `step_events` is the phase-bearing set.
        let terminal: Option<&StimulusEvent> = events.iter().find(|e| e.is_terminal);
        // StepStart events only — the PHASE-LAYOUT set. Per-step StepEnd
        // events are excluded here because a StepEnd seeds no new phase
        // (it is an end-of-hold marker, not a step boundary); including
        // them would produce a phantom extra phase and misalign the dense
        // phase index. StepEnd events are NOT discarded, though: the
        // step-local iteration_rate loop below pairs each StepStart[k]
        // with its own StepEnd[k] (looked up by step_index in the full
        // `events` vec), matching build_phase_buckets_with_stimulus. The
        // dense-index cross-step pairing is kept only as a fallback for
        // steps that have no StepEnd (a sched-died step, or legacy data
        // predating the StepEnd frame).
        let step_events: Vec<&StimulusEvent> = events
            .iter()
            .filter(|e| !e.is_terminal && !e.is_step_end)
            .collect();

        let mut boundaries: Vec<(u64, u64, Option<StimulusEvent>)> = Vec::new();
        for i in 0..step_events.len() {
            let start = (step_events[i].elapsed_ms as i64 + offset).max(0) as u64;
            // The LAST step phase extends to end-of-monitor-data, NOT to
            // the terminal event: the terminal is a rate boundary only,
            // and clamping the last phase's metric window to it would
            // drop trailing monitor samples (the host keeps sampling
            // through teardown). Preserves the pre-terminal window.
            let end = if i + 1 < step_events.len() {
                (step_events[i + 1].elapsed_ms as i64 + offset).max(0) as u64
            } else {
                last_monitor_ms.saturating_add(1)
            };
            let stimulus = if i == 0 {
                None
            } else {
                Some(step_events[i].clone())
            };
            boundaries.push((start, end, stimulus));
        }

        // Assign monitor samples to phases and compute metrics.
        let mut phases: Vec<Phase> = Vec::with_capacity(boundaries.len());
        for (idx, (start, end, stimulus)) in boundaries.into_iter().enumerate() {
            let phase_samples: Vec<&MonitorSample> = monitor_samples
                .iter()
                .filter(|s| s.elapsed_ms >= start && s.elapsed_ms < end && sample_looks_valid(s))
                .collect();

            let metrics = compute_metrics(&phase_samples);

            phases.push(Phase {
                index: idx,
                start_ms: start,
                end_ms: end,
                stimulus,
                metrics,
                changes: Vec::new(),
            });
        }

        // Per-phase iteration rate, STEP-LOCAL: each step's rate is its
        // own `StepStart[k] -> StepEnd[k]` delta — the step's OWN workers
        // measured start-to-end-of-hold, matching the snapshot path
        // (`build_phase_buckets_with_stimulus`). StepEnd events are
        // present in `events` (emitted independent of snapshot captures)
        // even on this monitor-only path, so the same step-local model
        // applies; without it, workers respawned fresh each step read
        // ~0 -> ~0 cross-step and every fresh-per-step phase but the last
        // silently reported no throughput. A step with NO StepEnd falls
        // back to the cross-step successor, or the terminal scenario-end
        // event for the last step — but that fallback yields a rate only
        // for legacy/synthetic data (a ScenarioEnd frame present without
        // per-step StepEnd frames). A sched-died step has neither a
        // StepEnd nor a terminal (the early return skips both emissions),
        // so its lookup and fallback both miss and it correctly reports no
        // rate. Duration is the guest-clock elapsed-ms delta between the
        // paired events — independent of the metric-sample window above
        // (whose last phase reaches end-of-monitor-data).
        #[allow(clippy::needless_range_loop)]
        for i in 0..phases.len() {
            let this = step_events[i];
            // Step-local boundary: this step's own StepEnd (same
            // step_index). Cross-step successor / terminal only when the
            // step has no StepEnd.
            let step_end: Option<&StimulusEvent> = this.step_index.and_then(|k| {
                events
                    .iter()
                    .find(|e| e.is_step_end && e.step_index == Some(k))
            });
            let next: Option<&StimulusEvent> = step_end.or_else(|| {
                if i + 1 < step_events.len() {
                    Some(step_events[i + 1])
                } else {
                    terminal
                }
            });
            // Shared formula with build_phase_buckets_with_stimulus via
            // StimulusEvent::rate_to (the sole iteration_rate site).
            if let Some(next_ev) = next
                && let Some(rate) = this.rate_to(next_ev)
            {
                phases[i].metrics.iteration_rate = Some(rate);
            }
        }

        // Detect changes at each phase boundary.
        for i in 1..phases.len() {
            let before = &phases[i - 1].metrics;
            let after_metrics = &phases[i].metrics;
            let mut changes = Vec::new();

            if before.sample_count > 0 && after_metrics.sample_count > 0 {
                if let (Some(bi), Some(ai)) = (before.avg_imbalance, after_metrics.avg_imbalance) {
                    changes.extend(detect_change(
                        bi,
                        ai,
                        IMBALANCE_THRESHOLD,
                        "imbalance",
                        true,
                    ));
                }
                if let (Some(bd), Some(ad)) = (before.avg_dsq_depth, after_metrics.avg_dsq_depth) {
                    changes.extend(detect_change(bd, ad, DSQ_THRESHOLD, "dsq_depth", true));
                }
                if let (Some(bf), Some(af)) = (before.fallback_rate, after_metrics.fallback_rate) {
                    changes.extend(detect_change(
                        bf,
                        af,
                        FALLBACK_RATE_THRESHOLD,
                        "fallback",
                        true,
                    ));
                }
                if let (Some(bk), Some(ak)) = (before.keep_last_rate, after_metrics.keep_last_rate)
                {
                    changes.extend(detect_change(
                        bk,
                        ak,
                        KEEP_LAST_RATE_THRESHOLD,
                        "keep_last",
                        true,
                    ));
                }
                if let (Some(bi), Some(ai)) = (before.iteration_rate, after_metrics.iteration_rate)
                    && bi > 0.0
                {
                    let rel_delta = (ai - bi) / bi;
                    if rel_delta.abs() > ITERATION_RATE_REL_THRESHOLD {
                        changes.push(PhaseChange {
                            direction: if rel_delta < 0.0 {
                                ChangeDirection::Degraded
                            } else {
                                ChangeDirection::Improved
                            },
                            metric: "throughput".to_string(),
                            before: bi,
                            after: ai,
                        });
                    }
                }
            }

            phases[i].changes = changes;
        }

        Self { phases }
    }

    /// Format the timeline with a system context header.
    ///
    /// Tests without a real context pass `&TimelineContext::default()`;
    /// the header lines (`kernel:`, `topology:`, etc.) are omitted but
    /// the `--- timeline ---` prefix is preserved.
    // No parameterless format() sibling: output with default context
    // is byte-identical, but the only non-test caller
    // (crate::test_support::eval) always has real context, so format()
    // would be dead code.
    pub fn format_with_context(&self, ctx: &TimelineContext) -> String {
        if self.phases.is_empty() {
            return String::new();
        }

        let mut out = String::from("--- timeline ---\n");

        // Render context header.
        let mut header_parts = Vec::new();
        if let Some(ref k) = ctx.kernel {
            header_parts.push(format!("kernel: {k}"));
        }
        if let Some(ref t) = ctx.topology {
            header_parts.push(format!("topology: {t}"));
        }
        if let Some(ref s) = ctx.scheduler {
            header_parts.push(format!("scheduler: {s}"));
        }
        if let Some(ref s) = ctx.scenario {
            header_parts.push(format!("scenario: {s}"));
        }
        if let Some(d) = ctx.duration_s {
            header_parts.push(format!("duration: {d:.1}s"));
        }
        if !header_parts.is_empty() {
            for part in &header_parts {
                out.push_str(part);
                out.push_str("  ");
            }
            // Trim trailing "  " appended by the last iteration.
            // Explicit length guard so a future edit that stops
            // appending the separator here can't underflow.
            if out.len() >= 2 {
                out.truncate(out.len() - 2);
            }
            out.push('\n');
        }

        self.format_phases(&mut out);
        out
    }

    /// Render phase details into the output buffer.
    fn format_phases(&self, out: &mut String) {
        for phase in &self.phases {
            let duration_ms = phase.end_ms.saturating_sub(phase.start_ms);

            if phase.index == 0 {
                // Phase 0 is the settle window before any stimulus.
                out.push_str(&format!(
                    "\nBASELINE (settle, {}ms, {} samples):\n",
                    duration_ms, phase.metrics.sample_count,
                ));
            } else {
                let label_start = phase
                    .stimulus
                    .as_ref()
                    .map(|s| {
                        let mut l = s.label.clone();
                        if let Some(op) = &s.op_kind {
                            l.push(' ');
                            l.push_str(op);
                        }
                        l
                    })
                    .unwrap_or_else(|| "?".to_string());

                out.push_str(&format!(
                    "\nPhase {}: {} ({}ms, {} samples):\n",
                    phase.index, label_start, duration_ms, phase.metrics.sample_count,
                ));
            }

            let m = &phase.metrics;
            if m.sample_count > 0 {
                out.push_str(&format!(
                    "  imbalance: avg={} max={} | dsq: avg={} max={}",
                    m.avg_imbalance
                        .map_or_else(|| "n/a".to_string(), |v| format!("{v:.1}")),
                    m.max_imbalance
                        .map_or_else(|| "n/a".to_string(), |v| format!("{v:.1}")),
                    m.avg_dsq_depth
                        .map_or_else(|| "n/a".to_string(), |v| format!("{v:.0}")),
                    m.max_dsq_depth,
                ));
                if let Some(fb) = m.fallback_rate {
                    out.push_str(&format!(" | fallback: {:.0}/s", fb));
                }
                if let Some(kl) = m.keep_last_rate {
                    out.push_str(&format!(" | keep_last: {:.0}/s", kl));
                }
                if let Some(ir) = m.iteration_rate {
                    out.push_str(&format!(" | throughput: {:.0} iter/s", ir));
                }
                out.push('\n');
                if m.stall_count > 0 {
                    out.push_str(&format!("  stalls: {}\n", m.stall_count));
                }
            } else {
                out.push_str("  [no samples]\n");
            }

            if let Some(ref stim) = phase.stimulus {
                let detail = stim.detail.as_deref().unwrap_or("");
                let op = stim.op_kind.as_deref().unwrap_or("?");
                out.push_str(&format!("  >>> {}: {op}", stim.label));
                if !detail.is_empty() {
                    out.push_str(&format!(" ({detail})"));
                }
                out.push('\n');
            }

            for change in &phase.changes {
                let delta = change.after - change.before;
                let sign = if delta > 0.0 { "+" } else { "" };
                out.push_str(&format!(
                    "  >>> {}: {} {sign}{:.1}\n",
                    change.direction, change.metric, delta,
                ));
            }
        }
    }

    /// Build a [`Timeline`] from pre-bucketed
    /// [`crate::assert::PhaseBucket`]s emitted by the metric pipeline.
    /// Preferred over [`Self::build`] when the caller already has
    /// `PhaseBucket`s in hand — avoids re-deriving phase boundaries
    /// from stimulus events + monitor samples by walking the buckets
    /// directly.
    ///
    /// One [`Phase`] is emitted per bucket, in `step_index` order.
    /// `PhaseMetrics` fields are populated from the bucket's
    /// `metrics` map via a name-keyed mapping:
    ///
    /// | PhaseBucket metric key  | PhaseMetrics field      |
    /// |-------------------------|-------------------------|
    /// | `max_imbalance_ratio`   | `max_imbalance`         |
    /// | `avg_imbalance_ratio`   | `avg_imbalance`         |
    /// | `max_dsq_depth`         | `max_dsq_depth`         |
    /// | `avg_dsq_depth`         | `avg_dsq_depth`         |
    /// | `stuck_count`           | `stall_count`           |
    /// | `total_fallback`        | `fallback_rate` (rate)  |
    /// | `total_keep_last`       | `keep_last_rate` (rate) |
    /// | `iteration_rate`        | `iteration_rate`        |
    ///
    /// Rate fields (`fallback_rate`, `keep_last_rate`) are computed
    /// by dividing the bucket's reduced counter delta by the
    /// bucket's window duration in seconds
    /// (`end_ms - start_ms / 1000.0`). When the window has zero
    /// duration (degenerate bucket) the rate stays `None`.
    ///
    /// Every PhaseMetrics field has a PhaseBucket source — but
    /// `iteration_rate` only when build_phase_buckets_with_stimulus
    /// (not the plain build_phase_buckets) produced the bucket.
    /// `iteration_rate` requires stimulus events that the per-test
    /// scenario produces; the plain bucket-builder used by some
    /// tests doesn't have access to them. Defaults to `None` when
    /// PhaseBucket.metrics has no `iteration_rate` key.
    ///
    /// `changes` (boundary degradation detection) IS computed
    /// here by diffing adjacent `PhaseMetrics` fields — same
    /// detection logic [`Self::build`] uses, applied after the
    /// per-bucket conversion. avg_imbalance + avg_dsq_depth are
    /// supplied by PhaseBucket so the detection runs on the same
    /// fields as the legacy path.
    pub fn from_phase_buckets(
        phase_buckets: &[crate::assert::PhaseBucket],
        stimulus_events: &[StimulusEvent],
        _ctx: &TimelineContext,
    ) -> Self {
        let mut sorted: Vec<&crate::assert::PhaseBucket> = phase_buckets.iter().collect();
        sorted.sort_by_key(|b| b.step_index);
        // Sort stimulus events by elapsed_ms so correlation finds
        // the closest event for each bucket window deterministically.
        // The terminal scenario-end event is excluded: it carries no
        // step ops/detail to render and its elapsed_ms lands past
        // every bucket window, so it would never correlate — filtering
        // it keeps the correlation set to real step starts only.
        // Per-step StepEnd events are likewise excluded so each bucket's
        // rendered op/detail label correlates to the step's defining
        // StepStart, not its end-of-hold marker (the bucket's iteration_rate
        // is already the step-local value computed upstream).
        let mut sorted_events: Vec<&StimulusEvent> = stimulus_events
            .iter()
            .filter(|e| !e.is_terminal && !e.is_step_end)
            .collect();
        sorted_events.sort_by_key(|e| e.elapsed_ms);
        let mut phases: Vec<Phase> = sorted
            .into_iter()
            .enumerate()
            .map(|(idx, b)| phase_from_bucket(idx, b, &sorted_events))
            .collect();
        // Boundary-change detection — same per-pair diffing logic
        // [`Self::build`] applies. Walks each adjacent (prev, curr)
        // pair and records significant deltas on the LATER phase's
        // `changes` vec so the operator sees "what changed when
        // entering this phase". Skips pairs where either side had
        // no samples — those phases produce default-zero metrics
        // and a diff would falsely paint every metric as changed.
        for i in 1..phases.len() {
            let before = phases[i - 1].metrics.clone();
            let after = &phases[i].metrics;
            if before.sample_count == 0 || after.sample_count == 0 {
                continue;
            }
            let mut changes = Vec::new();
            if let (Some(bi), Some(ai)) = (before.avg_imbalance, after.avg_imbalance) {
                changes.extend(detect_change(
                    bi,
                    ai,
                    IMBALANCE_THRESHOLD,
                    "imbalance",
                    true,
                ));
            }
            if let (Some(bd), Some(ad)) = (before.avg_dsq_depth, after.avg_dsq_depth) {
                changes.extend(detect_change(bd, ad, DSQ_THRESHOLD, "dsq_depth", true));
            }
            if let (Some(bf), Some(af)) = (before.fallback_rate, after.fallback_rate) {
                changes.extend(detect_change(
                    bf,
                    af,
                    FALLBACK_RATE_THRESHOLD,
                    "fallback",
                    true,
                ));
            }
            if let (Some(bk), Some(ak)) = (before.keep_last_rate, after.keep_last_rate) {
                changes.extend(detect_change(
                    bk,
                    ak,
                    KEEP_LAST_RATE_THRESHOLD,
                    "keep_last",
                    true,
                ));
            }
            if let (Some(bi), Some(ai)) = (before.iteration_rate, after.iteration_rate)
                && bi > 0.0
            {
                let rel = (ai - bi) / bi;
                if rel.abs() > ITERATION_RATE_REL_THRESHOLD {
                    changes.push(PhaseChange {
                        direction: if rel < 0.0 {
                            ChangeDirection::Degraded
                        } else {
                            ChangeDirection::Improved
                        },
                        metric: "throughput".to_string(),
                        before: bi,
                        after: ai,
                    });
                }
            }
            phases[i].changes = changes;
        }
        Self { phases }
    }

    /// Test helper — collect all degradation changes across phases.
    /// Retained after the gauntlet analyzer was removed; the scenarios
    /// pipeline consumes `Timeline` via `format_with_context` and does
    /// not read degradations directly.
    #[cfg(test)]
    pub fn degradations(&self) -> Vec<(&Phase, &PhaseChange)> {
        let mut out = Vec::new();
        for phase in &self.phases {
            for change in &phase.changes {
                if change.direction == ChangeDirection::Degraded {
                    out.push((phase, change));
                }
            }
        }
        out
    }
}

// ---------------------------------------------------------------------------
// PhaseBucket → Phase conversion
// ---------------------------------------------------------------------------

/// Build a [`Phase`] from a [`crate::assert::PhaseBucket`]. The
/// bucket's `step_index` becomes the phase index; the metric map
/// is projected onto the named `PhaseMetrics` fields per the table
/// in [`Timeline::from_phase_buckets`]. Phase 0 (BASELINE) emits
/// `stimulus = None`; later phases synthesize a [`StimulusEvent`]
/// whose label / op_kind come from the bucket label so the
/// failure-message renderer prints a recognizable phase header.
fn phase_from_bucket(
    idx: usize,
    b: &crate::assert::PhaseBucket,
    sorted_events: &[&StimulusEvent],
) -> Phase {
    let duration_s = if b.end_ms > b.start_ms {
        (b.end_ms - b.start_ms) as f64 / 1000.0
    } else {
        0.0
    };
    // Rate computation: counter-delta / duration_s. duration_s == 0
    // disables the rate (None) — degenerate buckets shouldn't
    // produce spurious infinities.
    let rate = |key: &str| -> Option<f64> {
        if duration_s <= 0.0 {
            return None;
        }
        b.metrics.get(key).map(|v| v / duration_s)
    };
    let metrics = PhaseMetrics {
        sample_count: b.sample_count,
        avg_imbalance: b.metrics.get("avg_imbalance_ratio").copied(),
        max_imbalance: b.metrics.get("max_imbalance_ratio").copied(),
        avg_dsq_depth: b.metrics.get("avg_dsq_depth").copied(),
        max_dsq_depth: b
            .metrics
            .get("max_dsq_depth")
            .map(|v| v.round() as u32)
            .unwrap_or(0),
        stall_count: b
            .metrics
            .get("stuck_count")
            .map(|v| v.round() as usize)
            .unwrap_or(0),
        fallback_rate: rate("total_fallback"),
        keep_last_rate: rate("total_keep_last"),
        // iteration_rate is already a rate per-phase, not a
        // counter-over-window — read it verbatim from the
        // bucket map; do NOT divide by duration.
        iteration_rate: b.metrics.get("iteration_rate").copied(),
    };
    let stimulus = if b.step_index == 0 {
        None
    } else {
        // Correlate with the closest StimulusEvent whose
        // elapsed_ms falls in [start_ms, end_ms]. Carrying the
        // real event preserves op_kind + detail in the failure-
        // message timeline render — `phase_from_bucket`'s prior
        // synthesis of a placeholder StimulusEvent with op_kind
        // = None / detail = None produced "Step[N]: ?" headers
        // that lost the operator-facing per-phase context the
        // legacy Timeline::build path carried.
        let correlated = sorted_events.iter().find(|e| {
            if b.start_ms == b.end_ms {
                e.elapsed_ms == b.start_ms
            } else {
                e.elapsed_ms >= b.start_ms && e.elapsed_ms < b.end_ms
            }
        });
        match correlated {
            Some(ev) => Some((*ev).clone()),
            None => Some(StimulusEvent {
                elapsed_ms: b.start_ms,
                label: b.label.clone(),
                op_kind: None,
                detail: None,
                total_iterations: None,
                // Synthetic placeholder for a bucket with no
                // correlated stimulus event; no authoritative step
                // ordinal to carry.
                step_index: None,
                is_terminal: false,
                is_step_end: false,
            }),
        }
    };
    Phase {
        index: idx,
        start_ms: b.start_ms,
        end_ms: b.end_ms,
        stimulus,
        metrics,
        changes: Vec::new(),
    }
}

// ---------------------------------------------------------------------------
// Metric computation
// ---------------------------------------------------------------------------

fn compute_metrics(samples: &[&MonitorSample]) -> PhaseMetrics {
    if samples.is_empty() {
        return PhaseMetrics::default();
    }

    // Filter out samples with implausible data (e.g. garbage DSQ depths
    // from uninitialized guest memory) before computing metrics.
    let valid: Vec<&MonitorSample> = samples
        .iter()
        .copied()
        .filter(|s| !s.cpus.is_empty() && sample_looks_valid(s))
        .collect();

    if valid.is_empty() {
        return PhaseMetrics {
            sample_count: 0,
            ..PhaseMetrics::default()
        };
    }

    let mut total_imbalance = 0.0f64;
    let mut max_imbalance = 0.0f64;
    let mut total_dsq = 0.0f64;
    let mut max_dsq = 0u32;
    let mut stall_count = 0usize;

    for sample in &valid {
        for cpu in &sample.cpus {
            max_dsq = max_dsq.max(cpu.local_dsq_depth);
        }
        let ratio = sample.imbalance_ratio();
        total_imbalance += ratio;
        if ratio > max_imbalance {
            max_imbalance = ratio;
        }

        let avg_dsq_this: f64 = sample
            .cpus
            .iter()
            .map(|c| c.local_dsq_depth as f64)
            .sum::<f64>()
            / sample.cpus.len() as f64;
        total_dsq += avg_dsq_this;
    }

    // Stall detection between consecutive valid samples in this phase.
    for w in valid.windows(2) {
        let prev = w[0];
        let curr = w[1];
        let cpu_count = prev.cpus.len().min(curr.cpus.len());
        for cpu in 0..cpu_count {
            let idle = curr.cpus[cpu].nr_running == 0 && prev.cpus[cpu].nr_running == 0;
            if curr.cpus[cpu].rq_clock != 0
                && curr.cpus[cpu].rq_clock == prev.cpus[cpu].rq_clock
                && !idle
            {
                stall_count += 1;
            }
        }
    }

    // Event counter rates: sum counters across CPUs for first/last valid
    // samples that have event_counters, compute delta / duration.
    let has_events = |s: &&MonitorSample| s.cpus.iter().any(|c| c.event_counters.is_some());
    let first_ev = valid.iter().copied().find(|s| has_events(s));
    let last_ev = valid.iter().copied().rev().find(|s| has_events(s));

    let (fallback_rate, keep_last_rate) = match (first_ev, last_ev) {
        (Some(first), Some(last)) if first.elapsed_ms < last.elapsed_ms => {
            // `<` guard above is expected to rule out underflow, but
            // `saturating_sub` is defense-in-depth: if a future change
            // loosens the guard, the worst outcome becomes
            // `duration_s == 0.0` (which disables the rate below) rather
            // than a panic.
            let duration_s = last.elapsed_ms.saturating_sub(first.elapsed_ms) as f64 / 1000.0;
            // Event counters can reset mid-run (scheduler restart) and
            // produce a negative raw delta. Shared helper clamps to
            // >= 0 so the computed rate never goes negative; same
            // semantics as MonitorSummary::compute_event_deltas.
            let fb_delta = crate::monitor::counter_delta(
                last.sum_event_field(|e| e.select_cpu_fallback).unwrap_or(0),
                first
                    .sum_event_field(|e| e.select_cpu_fallback)
                    .unwrap_or(0),
            );
            let kl_delta = crate::monitor::counter_delta(
                last.sum_event_field(|e| e.dispatch_keep_last).unwrap_or(0),
                first.sum_event_field(|e| e.dispatch_keep_last).unwrap_or(0),
            );
            (
                Some(fb_delta as f64 / duration_s),
                Some(kl_delta as f64 / duration_s),
            )
        }
        _ => (None, None),
    };

    let valid_count = valid.len();
    let n = valid_count as f64;
    // None when no valid samples — avoids a 0.0/0.0 NaN and keeps "no
    // data" distinct from a real zero (the detector skips None sides).
    PhaseMetrics {
        sample_count: valid_count,
        avg_imbalance: (valid_count > 0).then(|| total_imbalance / n),
        max_imbalance: (valid_count > 0).then_some(max_imbalance),
        avg_dsq_depth: (valid_count > 0).then(|| total_dsq / n),
        max_dsq_depth: max_dsq,
        stall_count,
        fallback_rate,
        keep_last_rate,
        iteration_rate: None,
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use crate::monitor::{CpuSnapshot, MonitorSample};

    fn sample(elapsed_ms: u64, cpus: Vec<(u32, u32, u64)>) -> MonitorSample {
        MonitorSample {
            prog_stats: None,
            elapsed_ms,
            cpus: cpus
                .into_iter()
                .map(|(nr_running, dsq, rq_clock)| CpuSnapshot {
                    nr_running,
                    scx_nr_running: 0,
                    local_dsq_depth: dsq,
                    rq_clock,
                    scx_flags: 0,
                    event_counters: None,
                    schedstat: None,
                    vcpu_cpu_time_ns: None,
                    vcpu_perf: None,
                    sched_domains: None,
                })
                .collect(),
        }
    }

    fn stimulus(elapsed_ms: u64, label: &str) -> StimulusEvent {
        StimulusEvent {
            elapsed_ms,
            label: label.to_string(),
            op_kind: None,
            detail: None,
            total_iterations: None,
            step_index: None,
            is_terminal: false,
            is_step_end: false,
        }
    }

    #[test]
    fn empty_inputs_empty_timeline() {
        let t = Timeline::build(&[], &[]);
        assert!(t.phases.is_empty());
    }

    #[test]
    fn no_stimulus_empty_timeline() {
        let samples = vec![sample(1000, vec![(2, 1, 100)])];
        let t = Timeline::build(&[], &samples);
        assert!(t.phases.is_empty());
    }

    #[test]
    fn no_monitor_empty_timeline() {
        let events = vec![stimulus(0, "ScenarioStart")];
        let t = Timeline::build(&events, &[]);
        assert!(t.phases.is_empty());
    }

    #[test]
    fn single_event_single_phase() {
        let events = vec![stimulus(0, "ScenarioStart")];
        let samples = vec![
            sample(600, vec![(2, 1, 100), (2, 1, 200)]),
            sample(700, vec![(2, 1, 300), (2, 1, 400)]),
        ];
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 1);
        // Both samples — including the one AT last_monitor_ms (700) —
        // must fall inside the single phase's [start, last_monitor_ms+1)
        // window. A > 0 check passes even if the last-sample-inclusion
        // off-by-one (end = last_monitor_ms+1) regressed to +0, dropping
        // the 700 sample. Pin the exact count.
        assert_eq!(t.phases[0].metrics.sample_count, 2);
    }

    #[test]
    fn two_events_two_phases() {
        let events = vec![stimulus(0, "ScenarioStart"), stimulus(3000, "StepStart[0]")];
        let samples: Vec<MonitorSample> = (5..65)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 2);
        // Pin WHERE the boundary fell, not just non-emptiness: 60 samples
        // at i*100 (i in 5..65 → 500..6400); the >500 warmup drops the
        // 500 sample (i=5), leaving 59. The StepStart[0]@3000 boundary
        // (offset-adjusted) splits them 30/29. A > 0 check passes even if
        // the offset/boundary math shifted the split point while leaving
        // samples on both sides.
        assert_eq!(t.phases[0].metrics.sample_count, 30);
        assert_eq!(t.phases[1].metrics.sample_count, 29);
        assert_eq!(
            t.phases[0].metrics.sample_count + t.phases[1].metrics.sample_count,
            59,
            "59 = 60 samples minus the 500ms sample dropped by the >500 warmup",
        );
    }

    #[test]
    fn improvement_detected() {
        // Phase 0: imbalanced
        // Phase 1: balanced
        let events = vec![stimulus(0, "ScenarioStart"), stimulus(1000, "StepStart[0]")];
        let mut samples = Vec::new();
        for i in 5..15 {
            samples.push(sample(
                i * 100,
                vec![(1, 1, i * 1000), (5, 1, i * 1000 + 100)],
            ));
        }
        for i in 15..25 {
            samples.push(sample(
                i * 100,
                vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)],
            ));
        }
        let t = Timeline::build(&events, &samples);
        let improvements: Vec<_> = t
            .phases
            .iter()
            .flat_map(|p| p.changes.iter())
            .filter(|c| c.direction == ChangeDirection::Improved)
            .collect();
        assert!(!improvements.is_empty());
    }

    #[test]
    fn format_non_empty() {
        let events = vec![stimulus(0, "ScenarioStart"), stimulus(1000, "StepStart[0]")];
        let samples: Vec<MonitorSample> = (5..25)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        let formatted = t.format_with_context(&TimelineContext::default());
        assert!(formatted.contains("BASELINE"));
        assert!(formatted.contains("Phase 1"));
        assert!(formatted.contains("imbalance"));
    }

    #[test]
    fn unsorted_events_sorted() {
        let events = vec![stimulus(3000, "StepStart[0]"), stimulus(0, "ScenarioStart")];
        let samples: Vec<MonitorSample> = (5..35)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 2);
        // First phase should be from ScenarioStart (earliest).
        assert!(t.phases[0].stimulus.is_none());
    }

    #[test]
    fn stall_detected_in_phase() {
        let events = vec![stimulus(0, "ScenarioStart")];
        let samples = vec![
            sample(600, vec![(1, 0, 5000), (1, 0, 6000)]),
            sample(700, vec![(1, 0, 5000), (1, 0, 7000)]), // cpu0 stalled
        ];
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases[0].metrics.stall_count, 1);
    }

    #[test]
    fn compute_metrics_empty() {
        let m = compute_metrics(&[]);
        assert_eq!(m.sample_count, 0);
        // No samples -> no measurement, not a false 0.0 (the sentinel fix).
        assert_eq!(m.avg_imbalance, None);
        assert_eq!(m.max_imbalance, None);
        assert_eq!(m.avg_dsq_depth, None);
        assert_eq!(m.max_dsq_depth, 0);
    }

    #[test]
    fn stimulus_event_with_detail() {
        let e = StimulusEvent {
            elapsed_ms: 100,
            label: "StepStart[0]".to_string(),
            op_kind: Some("SetCpuset".to_string()),
            detail: Some("4 cpus".to_string()),
            total_iterations: None,
            step_index: None,
            is_terminal: false,
            is_step_end: false,
        };
        let events = vec![stimulus(0, "ScenarioStart"), e];
        let samples: Vec<MonitorSample> = (5..25)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        let formatted = t.format_with_context(&TimelineContext::default());
        assert!(formatted.contains("SetCpuset"));
        assert!(formatted.contains("4 cpus"));
    }

    #[test]
    fn many_phases() {
        let events: Vec<StimulusEvent> = (0..10)
            .map(|i| stimulus(i * 500, &format!("Step[{i}]")))
            .collect();
        let samples: Vec<MonitorSample> = (5..55)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 10);
    }

    #[test]
    fn phase_metrics_accuracy() {
        let s1 = sample(600, vec![(1, 3, 100), (4, 5, 200)]); // ratio=4, avg_dsq=4
        let s2 = sample(700, vec![(2, 1, 300), (2, 7, 400)]); // ratio=1, avg_dsq=4
        let refs: Vec<&MonitorSample> = vec![&s1, &s2];
        let m = compute_metrics(&refs);
        assert_eq!(m.sample_count, 2);
        assert!((m.avg_imbalance.unwrap() - 2.5).abs() < 0.01); // (4+1)/2
        assert!((m.max_imbalance.unwrap() - 4.0).abs() < 0.01);
        assert_eq!(m.max_dsq_depth, 7);
    }

    // -- ChangeDirection Display tests --

    #[test]
    fn change_direction_display() {
        assert_eq!(format!("{}", ChangeDirection::Improved), "IMPROVEMENT");
        assert_eq!(format!("{}", ChangeDirection::Degraded), "DEGRADATION");
    }

    // -- compute_metrics with event counters --

    #[test]
    fn compute_metrics_with_event_counters() {
        use crate::monitor::ScxEventCounters;

        let s1 = MonitorSample {
            prog_stats: None,
            elapsed_ms: 600,
            cpus: vec![CpuSnapshot {
                nr_running: 2,
                local_dsq_depth: 1,
                rq_clock: 100,
                scx_nr_running: 0,
                scx_flags: 0,
                event_counters: Some(ScxEventCounters {
                    select_cpu_fallback: 10,
                    dispatch_keep_last: 5,
                    ..Default::default()
                }),
                schedstat: None,
                vcpu_cpu_time_ns: None,
                vcpu_perf: None,
                sched_domains: None,
            }],
        };
        let s2 = MonitorSample {
            prog_stats: None,
            elapsed_ms: 1600,
            cpus: vec![CpuSnapshot {
                nr_running: 2,
                local_dsq_depth: 1,
                rq_clock: 200,
                scx_nr_running: 0,
                scx_flags: 0,
                event_counters: Some(ScxEventCounters {
                    select_cpu_fallback: 110,
                    dispatch_keep_last: 55,
                    ..Default::default()
                }),
                schedstat: None,
                vcpu_cpu_time_ns: None,
                vcpu_perf: None,
                sched_domains: None,
            }],
        };
        let refs: Vec<&MonitorSample> = vec![&s1, &s2];
        let m = compute_metrics(&refs);
        // fallback delta: 110 - 10 = 100 over 1.0s = 100.0/s
        assert!((m.fallback_rate.unwrap() - 100.0).abs() < 0.01);
        // keep_last delta: 55 - 5 = 50 over 1.0s = 50.0/s
        assert!((m.keep_last_rate.unwrap() - 50.0).abs() < 0.01);
    }

    #[test]
    fn compute_metrics_no_event_counters() {
        let s1 = sample(600, vec![(2, 1, 100)]);
        let s2 = sample(700, vec![(2, 1, 200)]);
        let refs: Vec<&MonitorSample> = vec![&s1, &s2];
        let m = compute_metrics(&refs);
        assert!(m.fallback_rate.is_none());
        assert!(m.keep_last_rate.is_none());
    }

    #[test]
    fn compute_metrics_counter_reset_clamps_rates_to_non_negative() {
        // A scheduler restart between samples resets event counters
        // to smaller (or zero) values. Raw `last - first` then
        // produces a negative delta, which would flow into
        // `fallback_rate = delta / duration` and report a negative
        // rate. The shared counter_delta helper clamps to 0.
        use crate::monitor::ScxEventCounters;

        let s1 = MonitorSample {
            prog_stats: None,
            elapsed_ms: 0,
            cpus: vec![CpuSnapshot {
                nr_running: 2,
                local_dsq_depth: 1,
                rq_clock: 100,
                scx_nr_running: 0,
                scx_flags: 0,
                event_counters: Some(ScxEventCounters {
                    select_cpu_fallback: 1000,
                    dispatch_keep_last: 500,
                    ..Default::default()
                }),
                schedstat: None,
                vcpu_cpu_time_ns: None,
                vcpu_perf: None,
                sched_domains: None,
            }],
        };
        let s2 = MonitorSample {
            prog_stats: None,
            elapsed_ms: 1000,
            cpus: vec![CpuSnapshot {
                nr_running: 2,
                local_dsq_depth: 1,
                rq_clock: 200,
                scx_nr_running: 0,
                scx_flags: 0,
                event_counters: Some(ScxEventCounters {
                    select_cpu_fallback: 5,
                    dispatch_keep_last: 2,
                    ..Default::default()
                }),
                schedstat: None,
                vcpu_cpu_time_ns: None,
                vcpu_perf: None,
                sched_domains: None,
            }],
        };
        let refs: Vec<&MonitorSample> = vec![&s1, &s2];
        let m = compute_metrics(&refs);
        let fb = m.fallback_rate.expect("reset still produces Some rate");
        let kl = m.keep_last_rate.expect("reset still produces Some rate");
        assert!(
            fb >= 0.0,
            "reset must not produce negative fallback_rate, got {fb}"
        );
        assert!(
            kl >= 0.0,
            "reset must not produce negative keep_last_rate, got {kl}"
        );
    }

    // -- format with stalls --

    #[test]
    fn format_with_stalls_shown() {
        let events = vec![stimulus(0, "ScenarioStart")];
        let samples = vec![
            sample(600, vec![(1, 0, 5000), (1, 0, 6000)]),
            sample(700, vec![(1, 0, 5000), (1, 0, 7000)]), // cpu0 stalled
        ];
        let t = Timeline::build(&events, &samples);
        let formatted = t.format_with_context(&TimelineContext::default());
        assert!(formatted.contains("stalls: 1"));
    }

    // -- format with no samples in a phase --

    #[test]
    fn format_phase_no_samples() {
        // Create a phase with no samples by making a phase boundary far
        // beyond the last monitor sample's time.
        let events = vec![
            stimulus(0, "ScenarioStart"),
            stimulus(100, "StepStart[0]"),
            stimulus(50000, "StepStart[1]"),
        ];
        // All samples are in the middle phase window.
        let samples: Vec<MonitorSample> = (5..15)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        let formatted = t.format_with_context(&TimelineContext::default());
        // The last phase (50000+offset to end) should have no samples.
        assert!(formatted.contains("[no samples]"));
    }

    // -- timeline with fallback rate change detection --

    #[test]
    fn fallback_rate_degradation_detected() {
        use crate::monitor::ScxEventCounters;

        let events = vec![stimulus(0, "ScenarioStart"), stimulus(1000, "StepStart[0]")];
        let mut samples = Vec::new();
        // Phase 0: zero fallback rate (counter stays constant).
        for i in 5..15 {
            samples.push(MonitorSample {
                prog_stats: None,
                elapsed_ms: i * 100,
                cpus: vec![CpuSnapshot {
                    nr_running: 2,
                    local_dsq_depth: 1,
                    rq_clock: i * 1000,
                    scx_nr_running: 0,
                    scx_flags: 0,
                    event_counters: Some(ScxEventCounters {
                        select_cpu_fallback: 0,
                        dispatch_keep_last: 0,
                        ..Default::default()
                    }),
                    schedstat: None,
                    vcpu_cpu_time_ns: None,
                    vcpu_perf: None,
                    sched_domains: None,
                }],
            });
        }
        // Phase 1: very high fallback rate.
        // 10 samples over 1s. Counter goes from 0 to 500.
        // Rate = 500/1.0 = 500/s, well above threshold 10.0.
        for i in 15..25 {
            samples.push(MonitorSample {
                prog_stats: None,
                elapsed_ms: i * 100,
                cpus: vec![CpuSnapshot {
                    nr_running: 2,
                    local_dsq_depth: 1,
                    rq_clock: i * 1000,
                    scx_nr_running: 0,
                    scx_flags: 0,
                    event_counters: Some(ScxEventCounters {
                        select_cpu_fallback: (i as i64 - 15) * 50,
                        dispatch_keep_last: 0,
                        ..Default::default()
                    }),
                    schedstat: None,
                    vcpu_cpu_time_ns: None,
                    vcpu_perf: None,
                    sched_domains: None,
                }],
            });
        }
        let t = Timeline::build(&events, &samples);
        let degs: Vec<_> = t
            .degradations()
            .into_iter()
            .filter(|(_, c)| c.metric == "fallback")
            .collect();
        assert!(!degs.is_empty());
    }

    // -- format_with_context tests --

    #[test]
    fn format_with_context_includes_header() {
        let events = vec![stimulus(0, "ScenarioStart")];
        let samples = vec![
            sample(600, vec![(2, 1, 100), (2, 1, 200)]),
            sample(700, vec![(2, 1, 300), (2, 1, 400)]),
        ];
        let t = Timeline::build(&events, &samples);
        let ctx = TimelineContext {
            kernel: Some("6.14.0-rc3+".to_string()),
            topology: Some("2n4l4c2t (16 cpus)".to_string()),
            scheduler: Some("scx_mitosis".to_string()),
            scenario: Some("proportional".to_string()),
            duration_s: Some(20.5),
        };
        let formatted = t.format_with_context(&ctx);
        assert!(formatted.contains("--- timeline ---"));
        assert!(formatted.contains("kernel: 6.14.0-rc3+"));
        assert!(formatted.contains("topology: 2n4l4c2t (16 cpus)"));
        assert!(formatted.contains("scheduler: scx_mitosis"));
        assert!(formatted.contains("scenario: proportional"));
        assert!(formatted.contains("duration: 20.5s"));
        assert!(formatted.contains("BASELINE"));
    }

    #[test]
    fn format_with_context_partial_fields() {
        let events = vec![stimulus(0, "ScenarioStart")];
        let samples = vec![sample(600, vec![(2, 1, 100)])];
        let t = Timeline::build(&events, &samples);
        let ctx = TimelineContext {
            kernel: None,
            topology: Some("1n1l1c1t (1 cpus)".to_string()),
            scheduler: None,
            scenario: Some("basic".to_string()),
            duration_s: None,
        };
        let formatted = t.format_with_context(&ctx);
        assert!(formatted.contains("topology: 1n1l1c1t"));
        assert!(formatted.contains("scenario: basic"));
        assert!(!formatted.contains("kernel:"));
        assert!(!formatted.contains("scheduler:"));
        assert!(!formatted.contains("duration:"));
    }

    #[test]
    fn format_with_context_empty_timeline() {
        let t = Timeline { phases: vec![] };
        let ctx = TimelineContext {
            kernel: Some("6.14.0".to_string()),
            ..Default::default()
        };
        assert!(t.format_with_context(&ctx).is_empty());
    }

    #[test]
    fn format_with_context_empty_context() {
        let events = vec![stimulus(0, "ScenarioStart")];
        let samples = vec![sample(600, vec![(2, 1, 100)])];
        let t = Timeline::build(&events, &samples);
        let ctx = TimelineContext::default();
        let formatted = t.format_with_context(&ctx);
        // Should have the timeline header and phases but no context line.
        assert!(formatted.contains("--- timeline ---"));
        assert!(formatted.contains("BASELINE"));
        // The line after "--- timeline ---\n" should be "\nBASELINE" (no context line).
        let after_header = &formatted["--- timeline ---\n".len()..];
        assert!(after_header.starts_with('\n'));
    }

    #[test]
    fn garbage_dsq_samples_filtered_from_metrics() {
        // Samples with DSQ depth above DSQ_PLAUSIBILITY_CEILING should be
        // excluded from phase metrics (the bug: garbage values like 1.5B
        // were flowing into timeline output).
        let events = vec![stimulus(0, "ScenarioStart")];
        let garbage_dsq = 1_550_435_906u32;
        let samples = vec![
            // Garbage sample (DSQ above ceiling).
            MonitorSample {
                prog_stats: None,
                elapsed_ms: 600,
                cpus: vec![CpuSnapshot {
                    nr_running: 1,
                    local_dsq_depth: garbage_dsq,
                    rq_clock: 1000,
                    ..Default::default()
                }],
            },
            // Valid sample.
            sample(700, vec![(2, 3, 2000)]),
        ];
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 1);
        // Only the valid sample should be counted.
        assert_eq!(t.phases[0].metrics.sample_count, 1);
        assert_eq!(t.phases[0].metrics.max_dsq_depth, 3);
    }

    #[test]
    fn all_garbage_samples_yield_no_metrics() {
        let events = vec![stimulus(0, "ScenarioStart")];
        let samples = vec![MonitorSample {
            prog_stats: None,
            elapsed_ms: 600,
            cpus: vec![CpuSnapshot {
                nr_running: 1,
                local_dsq_depth: 50_000,
                rq_clock: 1000,
                ..Default::default()
            }],
        }];
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases[0].metrics.sample_count, 0);
    }

    // ---------------------------------------------------------------
    // Negative test: timeline detects degradation at phase transition
    // ---------------------------------------------------------------

    #[test]
    fn neg_timeline_detects_imbalance_degradation() {
        let events = vec![stimulus(0, "ScenarioStart"), stimulus(2000, "StepStart[0]")];
        let mut samples = Vec::new();
        for i in 6..25 {
            samples.push(sample(
                i * 100,
                vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)],
            ));
        }
        for i in 26..45 {
            samples.push(sample(
                i * 100,
                vec![(1, 1, i * 1000), (10, 1, i * 1000 + 100)],
            ));
        }
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 2, "must have 2 phases");
        assert!(!t.degradations().is_empty());

        // Phase 0 (baseline) must have samples and reasonable metrics.
        assert!(
            t.phases[0].metrics.sample_count > 0,
            "baseline must have samples"
        );
        assert!(
            (t.phases[0].metrics.avg_imbalance.unwrap() - 1.0).abs() < 0.5,
            "baseline imbalance should be ~1.0, got {:?}",
            t.phases[0].metrics.avg_imbalance,
        );

        // Phase 1 must have the stimulus label and degradation.
        assert!(
            t.phases[1].metrics.sample_count > 0,
            "phase 1 must have samples"
        );
        assert!(
            t.phases[1]
                .stimulus
                .as_ref()
                .is_some_and(|s| s.label == "StepStart[0]"),
            "phase 1 stimulus must be StepStart[0]",
        );

        let degs = t.degradations();
        assert!(!degs.is_empty());
        let (phase, change) = &degs[0];
        assert_eq!(phase.index, 1);
        assert_eq!(change.metric, "imbalance");
        assert_eq!(change.direction, ChangeDirection::Degraded);
        let delta = change.after - change.before;
        assert!(delta > 0.0, "delta must be positive for degradation");
        assert!(
            delta > IMBALANCE_THRESHOLD,
            "delta {:.1} must exceed threshold {:.1}",
            delta,
            IMBALANCE_THRESHOLD
        );
        assert!(
            change.before < 2.0,
            "before should be low: {:.1}",
            change.before
        );
        assert!(
            change.after > 5.0,
            "after should be high: {:.1}",
            change.after
        );

        // Format output must be parseable.
        let formatted = t.format_with_context(&TimelineContext::default());
        assert!(
            formatted.contains("BASELINE"),
            "format must include BASELINE phase"
        );
        assert!(formatted.contains("Phase 1"), "format must include Phase 1");
        assert!(
            formatted.contains("DEGRADATION"),
            "format must include DEGRADATION label"
        );
        assert!(
            formatted.contains("imbalance"),
            "format must name the metric"
        );
    }

    #[test]
    fn neg_timeline_detects_dsq_depth_degradation() {
        let events = vec![stimulus(0, "ScenarioStart"), stimulus(2000, "StepStart[0]")];
        let mut samples = Vec::new();
        for i in 6..25 {
            samples.push(sample(
                i * 100,
                vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)],
            ));
        }
        for i in 26..45 {
            samples.push(sample(
                i * 100,
                vec![(2, 20, i * 1000), (2, 20, i * 1000 + 100)],
            ));
        }
        let t = Timeline::build(&events, &samples);
        assert!(
            !t.degradations().is_empty(),
            "DSQ depth jump must be detected"
        );
        let degs = t.degradations();
        let dsq_deg = degs.iter().find(|(_, c)| c.metric == "dsq_depth");
        assert!(dsq_deg.is_some(), "must detect dsq_depth degradation");
        let (phase, change) = dsq_deg.unwrap();
        assert_eq!(phase.index, 1);
        assert_eq!(change.direction, ChangeDirection::Degraded);
        let delta = change.after - change.before;
        assert!(
            delta > DSQ_THRESHOLD,
            "dsq delta {:.1} must exceed threshold {:.1}",
            delta,
            DSQ_THRESHOLD
        );
        assert!(
            change.before < 5.0,
            "before dsq should be low: {:.1}",
            change.before
        );
        assert!(
            change.after > 15.0,
            "after dsq should be high: {:.1}",
            change.after
        );

        let formatted = t.format_with_context(&TimelineContext::default());
        assert!(
            formatted.contains("dsq_depth"),
            "format must name dsq_depth"
        );
        assert!(
            formatted.contains("DEGRADATION"),
            "format must label degradation"
        );
    }

    #[test]
    fn neg_timeline_no_degradation_when_stable() {
        let events = vec![stimulus(0, "ScenarioStart"), stimulus(2000, "StepStart[0]")];
        let mut samples = Vec::new();
        for i in 6..45 {
            samples.push(sample(
                i * 100,
                vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)],
            ));
        }
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 2, "must have 2 phases");
        assert!(t.phases[0].metrics.sample_count > 0);
        assert!(t.phases[1].metrics.sample_count > 0);
        assert!(
            t.degradations().is_empty(),
            "stable phases must not show degradation"
        );
        assert!(t.degradations().is_empty());
        // All phase changes should be empty.
        for phase in &t.phases {
            assert!(
                phase.changes.is_empty(),
                "phase {} should have no changes",
                phase.index
            );
        }
    }

    // -- detect_change direct tests --

    #[test]
    fn detect_change_higher_is_worse_positive_delta_degraded() {
        let c = detect_change(1.0, 5.0, 0.5, "imbalance", true).unwrap();
        assert_eq!(c.direction, ChangeDirection::Degraded);
        assert_eq!(c.metric, "imbalance");
        assert!((c.before - 1.0).abs() < f64::EPSILON);
        assert!((c.after - 5.0).abs() < f64::EPSILON);
    }

    #[test]
    fn detect_change_higher_is_worse_negative_delta_improved() {
        let c = detect_change(5.0, 1.0, 0.5, "imbalance", true).unwrap();
        assert_eq!(c.direction, ChangeDirection::Improved);
    }

    #[test]
    fn detect_change_lower_is_worse_negative_delta_degraded() {
        let c = detect_change(100.0, 50.0, 10.0, "throughput", false).unwrap();
        assert_eq!(c.direction, ChangeDirection::Degraded);
    }

    #[test]
    fn detect_change_lower_is_worse_positive_delta_improved() {
        let c = detect_change(50.0, 100.0, 10.0, "throughput", false).unwrap();
        assert_eq!(c.direction, ChangeDirection::Improved);
    }

    #[test]
    fn detect_change_below_threshold_returns_none() {
        assert!(detect_change(1.0, 1.3, 0.5, "imbalance", true).is_none());
    }

    #[test]
    fn detect_change_exactly_at_threshold_returns_none() {
        assert!(detect_change(1.0, 1.5, 0.5, "imbalance", true).is_none());
    }

    // -- iteration_rate computation tests --

    fn stimulus_with_iters(elapsed_ms: u64, label: &str, total_iterations: u64) -> StimulusEvent {
        StimulusEvent {
            elapsed_ms,
            label: label.to_string(),
            op_kind: None,
            detail: None,
            total_iterations: Some(total_iterations),
            step_index: None,
            is_terminal: false,
            is_step_end: false,
        }
    }

    #[test]
    fn iteration_rate_computed_from_consecutive_events() {
        // Two events with total_iterations: phase 0 spans 0..3000ms
        // (aligned). iterations: 0 -> 3000 over ~3s = 1000 iter/s.
        let events = vec![
            stimulus_with_iters(0, "ScenarioStart", 0),
            stimulus_with_iters(3000, "StepStart[0]", 3000),
        ];
        let samples: Vec<MonitorSample> = (5..35)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 2);
        let rate = t.phases[0].metrics.iteration_rate;
        assert!(rate.is_some(), "phase 0 should have iteration_rate");
        let r = rate.unwrap();
        // Duration is phase boundary difference, not exactly 3s due to
        // clock alignment offset. Check that the rate is reasonable.
        assert!(r > 500.0 && r < 2000.0, "rate {r} outside expected range");
    }

    #[test]
    fn iteration_rate_none_without_total_iterations() {
        // Events without total_iterations: iteration_rate should be None.
        let events = vec![stimulus(0, "ScenarioStart"), stimulus(3000, "StepStart[0]")];
        let samples: Vec<MonitorSample> = (5..35)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert!(t.phases[0].metrics.iteration_rate.is_none());
        assert!(t.phases[1].metrics.iteration_rate.is_none());
    }

    /// Build a wire `StimulusEvent` so tests can drive the FULL
    /// `from_wire` path (the production conversion) rather than
    /// constructing the timeline event directly — the latter bypassed
    /// the `total_iterations == 0` sentinel.
    fn wire_event(
        elapsed_ms: u32,
        step_index: u16,
        total_iterations: u64,
    ) -> crate::vmm::wire::StimulusEvent {
        crate::vmm::wire::StimulusEvent {
            elapsed_ms,
            step_index,
            op_count: 0,
            op_kinds: 0,
            cgroup_count: 0,
            worker_count: 1,
            total_iterations,
        }
    }

    #[test]
    fn from_wire_zero_iterations_is_some_baseline() {
        // total_iterations is a cumulative counter, so a
        // start-of-window 0 is a legitimate baseline, NOT a missing
        // sample. from_wire must carry Some(0), never collapse it to
        // None.
        let te = StimulusEvent::from_wire(&wire_event(0, 1, 0));
        assert_eq!(te.total_iterations, Some(0));
        assert_eq!(te.step_index, Some(1));
        assert!(!te.is_terminal);
        assert!(
            !te.is_step_end,
            "a StepStart-derived event is not a StepEnd"
        );
    }

    #[test]
    fn from_step_end_carries_step_index_and_marks_step_end() {
        // A StepEnd frame reuses the StimulusEvent wire body.
        // from_step_end must carry the same 1-indexed step_index and the
        // step's end-of-hold total_iterations, flag is_step_end, and leave
        // is_terminal off (it is a real per-step boundary, not the
        // scenario terminal).
        let te = StimulusEvent::from_step_end(&wire_event(1_900, 1, 9_000));
        assert_eq!(te.step_index, Some(1));
        assert_eq!(te.total_iterations, Some(9_000));
        assert!(te.is_step_end, "StepEnd-derived event must set is_step_end");
        assert!(
            !te.is_terminal,
            "StepEnd is a per-step boundary, not the scenario terminal",
        );
    }

    #[test]
    fn from_wire_first_step_zero_baseline_yields_rate() {
        // First-step zero-baseline regression, driven through the FULL from_wire path
        // (unit tests previously injected Some(0) directly, masking the
        // wire 0->None collapse). First step frame reads 0 cumulative
        // iterations, the second reads 3000; the first phase must get a
        // rate rather than a silent None.
        let events: Vec<StimulusEvent> = [wire_event(0, 1, 0), wire_event(3000, 2, 3000)]
            .iter()
            .map(StimulusEvent::from_wire)
            .collect();
        let samples: Vec<MonitorSample> = (5..35)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert!(
            t.phases[0].metrics.iteration_rate.is_some(),
            "first phase must get a rate from the 0 baseline",
        );
    }

    #[test]
    fn terminal_event_gives_last_step_rate_without_phantom_phase() {
        // The last step has no successor step event, so its
        // iteration_rate needs the terminal scenario-end boundary. The
        // terminal must supply that boundary WITHOUT adding a phantom
        // trailing phase.
        let mut events: Vec<StimulusEvent> = [wire_event(0, 1, 0), wire_event(2000, 2, 4000)]
            .iter()
            .map(StimulusEvent::from_wire)
            .collect();
        events.push(StimulusEvent::terminal(4000, 10000));
        let samples: Vec<MonitorSample> = (5..45)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(
            t.phases.len(),
            2,
            "two step events -> two phases; terminal seeds none",
        );
        assert!(
            t.phases[1].metrics.iteration_rate.is_some(),
            "last step must get a rate from the terminal boundary",
        );
    }

    #[test]
    fn build_filters_step_end_events_no_phantom_phase() {
        // A StepEnd must be filtered from the PHASE-LAYOUT set
        // (it is an end-of-hold marker, not a step boundary) so it neither
        // adds a phantom phase nor misaligns the dense phase index. Two
        // StepStart events with an interleaved StepEnd still yield exactly
        // two phases. (StepEnd is still consumed for the step-local RATE —
        // see build_pairs_step_local_when_step_end_events_present.)
        let events: Vec<StimulusEvent> = vec![
            StimulusEvent::from_wire(&wire_event(0, 1, 0)),
            StimulusEvent::from_step_end(&wire_event(1_900, 1, 9_000)),
            StimulusEvent::from_wire(&wire_event(2_000, 2, 9_000)),
        ];
        let samples: Vec<MonitorSample> = (5..35)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(
            t.phases.len(),
            2,
            "two StepStart events -> two phases; the interleaved StepEnd seeds none",
        );
    }

    #[test]
    fn build_pairs_step_local_when_step_end_events_present() {
        // The monitor-only Timeline::build fallback must ALSO
        // use step-local StepStart[k] -> StepEnd[k] pairing when StepEnd
        // events are present (they are emitted independent of snapshot
        // captures), NOT the cross-step StepStart[k] -> StepStart[k+1]
        // pairing that reads 0 -> 0 for respawned-per-step workers. Two
        // fresh-per-step steps (each StepStart reads ~0); without
        // step-local pairing phase 0 would be None (0 -> 0 cross-step).
        // With it, both phases get a positive rate.
        let events: Vec<StimulusEvent> = vec![
            StimulusEvent::from_wire(&wire_event(0, 1, 0)), // StepStart[0], iters 0
            StimulusEvent::from_step_end(&wire_event(1_000, 1, 5_000)), // StepEnd[0], iters 5000
            StimulusEvent::from_wire(&wire_event(1_100, 2, 0)), // StepStart[1] respawned, iters 0
            StimulusEvent::from_step_end(&wire_event(2_100, 2, 3_000)), // StepEnd[1], iters 3000
        ];
        let samples: Vec<MonitorSample> = (1..30)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(
            t.phases.len(),
            2,
            "two StepStart events -> two phases (each StepEnd seeds none)",
        );
        assert!(
            t.phases[0].metrics.iteration_rate.is_some(),
            "phase 0 must get a step-local rate from StepStart[0] -> StepEnd[0], \
             not the cross-step 0 -> 0 None (the old cross-step fallback bug)",
        );
        assert!(
            t.phases[1].metrics.iteration_rate.is_some(),
            "phase 1 (respawned workers) must get its own step-local rate",
        );
    }

    #[test]
    fn build_stalled_step_with_step_end_reports_measured_zero_not_cross_step() {
        // Monitor-only path: a step that HAS a StepEnd but
        // stalled (StepEnd[k] == StepStart[k]) reports its MEASURED-ZERO
        // step-local rate (Some(0.0)) — its StepEnd lookup hits, so the
        // cross-step fallback must NOT run. Mirrors the snapshot path's
        // build_phase_buckets_with_stimulus_stalled_step_reports_measured_zero.
        // Step 0 stalls (0 -> 0); a persistent population reads 500 at
        // StepStart[1], so a cross-step StepStart[0] -> StepStart[1] leak
        // would be ~454/s. Step 1 advances 500 -> 5500 (5000/s).
        let events: Vec<StimulusEvent> = vec![
            StimulusEvent::from_wire(&wire_event(0, 1, 0)), // StepStart[0], iters 0
            StimulusEvent::from_step_end(&wire_event(1_000, 1, 0)), // StepEnd[0], STALLED 0
            StimulusEvent::from_wire(&wire_event(1_100, 2, 500)), // StepStart[1], persistent 500
            StimulusEvent::from_step_end(&wire_event(2_100, 2, 5_500)), // StepEnd[1], iters 5500
        ];
        let samples: Vec<MonitorSample> = (1..30)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 2);
        assert_eq!(
            t.phases[0].metrics.iteration_rate,
            Some(0.0),
            "a stalled step reports measured-zero throughput, not the \
             cross-step StepStart[0] -> StepStart[1] persistent-leak rate",
        );
        assert!(
            t.phases[1].metrics.iteration_rate.is_some(),
            "step 1 still reports its own step-local rate",
        );
    }

    #[test]
    fn terminal_event_single_step_rate() {
        // Boundary case: a one-step scenario (first == last). With the
        // 0 baseline and the terminal boundary        // the single step still gets a rate, and the terminal adds no
        // phase.
        let mut events: Vec<StimulusEvent> = [wire_event(0, 1, 0)]
            .iter()
            .map(StimulusEvent::from_wire)
            .collect();
        events.push(StimulusEvent::terminal(3000, 9000));
        let samples: Vec<MonitorSample> = (5..35)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(
            t.phases.len(),
            1,
            "single step -> one phase; terminal adds none"
        );
        assert!(
            t.phases[0].metrics.iteration_rate.is_some(),
            "single step gets a rate (first == last)",
        );
    }

    #[test]
    fn terminal_event_stalled_last_step_reports_measured_zero() {
        // Boundary case: the last step's counter did not advance
        // (terminal count == last step-start count): e == s. That is
        // MEASURED ZERO throughput — a real value (the strongest
        // degradation signal), not "unmeasured" — so rate_to returns
        // Some(0.0), and the zero surfaces to the degradation detector.
        // Only a counter DECREASE (e < s) is unmeasurable -> None.
        let mut events: Vec<StimulusEvent> = [wire_event(0, 1, 0), wire_event(2000, 2, 4000)]
            .iter()
            .map(StimulusEvent::from_wire)
            .collect();
        events.push(StimulusEvent::terminal(4000, 4000)); // no advance
        let samples: Vec<MonitorSample> = (5..45)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 2);
        assert_eq!(
            t.phases[1].metrics.iteration_rate,
            Some(0.0),
            "stalled last step (e == s) reports measured-zero, not None",
        );
    }

    #[test]
    fn iteration_rate_counter_decrease_yields_no_rate() {
        // A counter DECREASE between consecutive step frames (e.g. a
        // step-local worker population reset) is unmeasurable and must NOT
        // produce a negative or conflated rate — the `e < s` guard drops
        // the pair, returning None (distinct from `e == s`, which is a
        // measured-zero Some(0.0)). Pin it so a future change that loosens
        // the guard to allow a negative delta fails here.
        let events: Vec<StimulusEvent> = [
            wire_event(0, 1, 0),
            wire_event(2000, 2, 5000),
            wire_event(3000, 3, 1000), // counter dropped 5000 -> 1000
        ]
        .iter()
        .map(StimulusEvent::from_wire)
        .collect();
        let samples: Vec<MonitorSample> = (5..35)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        // phase 1 is step 2 (frame iters 5000 -> next 1000): decrease.
        assert!(
            t.phases[1].metrics.iteration_rate.is_none(),
            "a counter decrease must not manufacture a (negative) rate",
        );
    }

    #[test]
    fn iteration_rate_zero_duration_yields_no_rate() {
        // Two consecutive frames with identical elapsed_ms -> the rate
        // denominator is 0; the duration==0 guard must drop the pair
        // rather than divide and produce inf/NaN.
        let events: Vec<StimulusEvent> = [wire_event(1000, 1, 0), wire_event(1000, 2, 2000)]
            .iter()
            .map(StimulusEvent::from_wire)
            .collect();
        let samples: Vec<MonitorSample> = (5..35)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert!(
            t.phases[0].metrics.iteration_rate.is_none(),
            "zero-duration pair must not divide; rate stays None",
        );
    }

    #[test]
    fn terminal_not_last_does_not_misalign_or_misattribute() {
        // Robustness: even if a corrupt/out-of-order elapsed_ms made the
        // terminal sort BEFORE a real step, the explicit is_terminal
        // extraction (not positional) must keep the step phases aligned
        // and attribute the early step's rate correctly. A corrupt
        // terminal contributes no spurious rate (its position can't
        // shift the dense phase index).
        let mut events: Vec<StimulusEvent> = [wire_event(0, 1, 0), wire_event(2000, 2, 4000)]
            .iter()
            .map(StimulusEvent::from_wire)
            .collect();
        // Terminal with elapsed_ms BEFORE step 2 (simulated corruption).
        events.push(StimulusEvent::terminal(500, 9000));
        let samples: Vec<MonitorSample> = (5..45)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        // Two step events -> two phases regardless of terminal position.
        assert_eq!(
            t.phases.len(),
            2,
            "terminal position must not change phase count"
        );
        // Phase 0 (step 1) still gets its correct rate (0 -> 4000 over
        // 2s = 2000/s): the misordered terminal did not misalign it.
        assert_eq!(
            t.phases[0].metrics.iteration_rate,
            Some(2000.0),
            "early step rate must be correct despite a misordered terminal",
        );
    }

    #[test]
    fn throughput_degradation_detected() {
        // Phase 0: high throughput (0 -> 10000 iters over ~2s = ~5000/s)
        // Phase 1: low throughput (10000 -> 11000 iters over ~2s = ~500/s)
        // 90% drop exceeds ITERATION_RATE_REL_THRESHOLD (0.3).
        let events = vec![
            stimulus_with_iters(0, "ScenarioStart", 0),
            stimulus_with_iters(2000, "StepStart[0]", 10000),
            stimulus_with_iters(4000, "StepEnd[0]", 11000),
        ];
        let samples: Vec<MonitorSample> = (5..45)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(t.phases.len(), 3);
        // Phase 0 should have high iteration_rate.
        assert!(t.phases[0].metrics.iteration_rate.is_some());
        // Phase 1 should have low iteration_rate.
        assert!(t.phases[1].metrics.iteration_rate.is_some());
        let r0 = t.phases[0].metrics.iteration_rate.unwrap();
        let r1 = t.phases[1].metrics.iteration_rate.unwrap();
        assert!(
            r0 > r1,
            "phase 0 rate ({r0}) should exceed phase 1 rate ({r1})"
        );

        // Throughput degradation should be detected at phase 1 boundary.
        let degs: Vec<_> = t
            .degradations()
            .into_iter()
            .filter(|(_, c)| c.metric == "throughput")
            .collect();
        assert!(!degs.is_empty(), "throughput degradation must be detected");
        let (phase, change) = &degs[0];
        assert_eq!(phase.index, 1);
        assert_eq!(change.direction, ChangeDirection::Degraded);
        assert!(change.before > change.after);
    }

    #[test]
    fn throughput_collapse_to_zero_is_flagged() {
        // A phase that collapses to ZERO throughput (e == s, measured
        // zero) must be flagged as a degradation — it is the strongest
        // degradation signal. Previously the zero phase's rate_to returned
        // None, so the detector's Some/Some gate dropped it and the worst
        // degradation went silently unreported.
        let events = vec![
            stimulus_with_iters(0, "ScenarioStart", 0),
            stimulus_with_iters(2000, "StepStart[0]", 10000), // phase 0: ~5000/s
            stimulus_with_iters(4000, "StepStart[1]", 10000), // phase 1: 0/s (stalled)
        ];
        let samples: Vec<MonitorSample> = (5..45)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        assert_eq!(
            t.phases[1].metrics.iteration_rate,
            Some(0.0),
            "the collapsed phase must report measured-zero throughput",
        );
        let degs: Vec<_> = t
            .degradations()
            .into_iter()
            .filter(|(p, c)| p.index == 1 && c.metric == "throughput")
            .collect();
        assert!(
            !degs.is_empty(),
            "a collapse to zero throughput must be flagged as a degradation",
        );
        assert_eq!(degs[0].1.direction, ChangeDirection::Degraded);
        assert_eq!(degs[0].1.after, 0.0);
    }

    #[test]
    fn throughput_improvement_detected() {
        // Phase 0: low throughput (0 -> 500 iters over ~2s = ~250/s)
        // Phase 1: high throughput (500 -> 10500 iters over ~2s = ~5000/s)
        // >30% increase should be flagged as improvement.
        let events = vec![
            stimulus_with_iters(0, "ScenarioStart", 0),
            stimulus_with_iters(2000, "StepStart[0]", 500),
            stimulus_with_iters(4000, "StepEnd[0]", 10500),
        ];
        let samples: Vec<MonitorSample> = (5..45)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        let improvements: Vec<_> = t
            .phases
            .iter()
            .flat_map(|p| p.changes.iter())
            .filter(|c| c.metric == "throughput" && c.direction == ChangeDirection::Improved)
            .collect();
        assert!(
            !improvements.is_empty(),
            "throughput improvement must be detected"
        );
    }

    #[test]
    fn throughput_stable_below_threshold() {
        // Phase 0: 1000 iter/s
        // Phase 1: ~900 iter/s (10% drop, below 30% threshold)
        // No throughput change should be detected.
        let events = vec![
            stimulus_with_iters(0, "ScenarioStart", 0),
            stimulus_with_iters(2000, "StepStart[0]", 2000),
            stimulus_with_iters(4000, "StepEnd[0]", 3800),
        ];
        let samples: Vec<MonitorSample> = (5..45)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        let throughput_changes: Vec<_> = t
            .phases
            .iter()
            .flat_map(|p| p.changes.iter())
            .filter(|c| c.metric == "throughput")
            .collect();
        assert!(
            throughput_changes.is_empty(),
            "10% change should not trigger throughput change detection"
        );
    }

    #[test]
    fn from_phase_buckets_maps_known_metrics_and_renders_phase_block() {
        use crate::assert::PhaseBucket;
        use std::collections::BTreeMap;
        let mut s0_metrics = BTreeMap::new();
        s0_metrics.insert("max_dsq_depth".to_string(), 7.0);
        s0_metrics.insert("avg_dsq_depth".to_string(), 2.5);
        s0_metrics.insert("max_imbalance_ratio".to_string(), 3.5);
        s0_metrics.insert("avg_imbalance_ratio".to_string(), 1.8);
        s0_metrics.insert("total_fallback".to_string(), 200.0);
        let buckets = vec![
            PhaseBucket {
                step_index: 0,
                label: "BASELINE".to_string(),
                start_ms: 0,
                end_ms: 1000,
                sample_count: 5,
                metrics: BTreeMap::new(),
            },
            PhaseBucket {
                step_index: 1,
                label: "Step[0]".to_string(),
                start_ms: 1000,
                end_ms: 6000,
                sample_count: 20,
                metrics: s0_metrics,
            },
        ];
        let t = Timeline::from_phase_buckets(&buckets, &[], &TimelineContext::default());
        assert_eq!(t.phases.len(), 2);
        // Phase 0 (BASELINE) — no stimulus, no metrics.
        assert!(t.phases[0].stimulus.is_none());
        assert_eq!(t.phases[0].metrics.sample_count, 5);
        assert_eq!(t.phases[0].metrics.max_dsq_depth, 0);
        // Phase 1 (Step[0]) — stimulus set, metrics projected from
        // the bucket map.
        assert!(t.phases[1].stimulus.is_some());
        assert_eq!(t.phases[1].stimulus.as_ref().unwrap().label, "Step[0]");
        assert_eq!(t.phases[1].metrics.sample_count, 20);
        assert_eq!(t.phases[1].metrics.max_dsq_depth, 7);
        assert!((t.phases[1].metrics.avg_dsq_depth.unwrap() - 2.5).abs() < f64::EPSILON);
        assert!((t.phases[1].metrics.max_imbalance.unwrap() - 3.5).abs() < f64::EPSILON);
        assert!((t.phases[1].metrics.avg_imbalance.unwrap() - 1.8).abs() < f64::EPSILON);
        // fallback_rate = 200 / (5000 / 1000) = 40.0 events/s
        assert_eq!(t.phases[1].metrics.fallback_rate, Some(40.0));
        // keep_last_rate absent → None (no total_keep_last in metrics map)
        assert_eq!(t.phases[1].metrics.keep_last_rate, None);
        // avg_dsq_depth + avg_imbalance are now both wired
        // (per the doc table). iteration_rate is the only field
        // PhaseBucket cannot supply directly (depends on stimulus
        // event totals, not a per-Sample reading).
        assert_eq!(t.phases[1].metrics.iteration_rate, None);
        // Render produces a non-empty timeline block.
        let formatted = t.format_with_context(&TimelineContext::default());
        assert!(formatted.contains("--- timeline ---"));
        assert!(formatted.contains("BASELINE"));
        assert!(formatted.contains("Step[0]"));
    }

    /// Boundary change-detection on the from_phase_buckets path — the
    /// PRODUCTION success path (`evaluate_vm_result` prefers
    /// from_phase_buckets over `build`). Two adjacent metric-bearing
    /// buckets whose avg_imbalance / avg_dsq_depth cross the thresholds
    /// in the worsening direction must record Degraded changes on the
    /// ENTERED phase (phases[1]), and the BASELINE phase records none.
    /// Without this, the 821-869 detection loop ships unverified (a
    /// wrong threshold, inverted direction, wrong-phase recording, or
    /// wrong metric field would all slip past the other
    /// from_phase_buckets tests, which never trigger the loop).
    #[test]
    fn from_phase_buckets_detects_boundary_degradation() {
        use crate::assert::PhaseBucket;
        use std::collections::BTreeMap;
        let mut base = BTreeMap::new();
        base.insert("avg_imbalance_ratio".to_string(), 1.0);
        base.insert("avg_dsq_depth".to_string(), 1.0);
        let mut step = BTreeMap::new();
        step.insert("avg_imbalance_ratio".to_string(), 2.0); // +1.0 > 0.5 threshold
        step.insert("avg_dsq_depth".to_string(), 6.0); // +5.0 > 3.0 threshold
        let buckets = vec![
            PhaseBucket {
                step_index: 0,
                label: "BASELINE".to_string(),
                start_ms: 0,
                end_ms: 1000,
                sample_count: 5,
                metrics: base,
            },
            PhaseBucket {
                step_index: 1,
                label: "Step[0]".to_string(),
                start_ms: 1000,
                end_ms: 6000,
                sample_count: 20,
                metrics: step,
            },
        ];
        let t = Timeline::from_phase_buckets(&buckets, &[], &TimelineContext::default());
        // Change recorded on the ENTERED phase, never the prior one.
        assert!(
            t.phases[0].changes.is_empty(),
            "BASELINE has no prior phase to diff; changes belong to the entered phase",
        );
        let changes = &t.phases[1].changes;
        let imb = changes
            .iter()
            .find(|c| c.metric == "imbalance")
            .expect("imbalance change must fire (1.0 -> 2.0 crosses 0.5)");
        assert_eq!(imb.direction, ChangeDirection::Degraded);
        assert!((imb.before - 1.0).abs() < f64::EPSILON);
        assert!((imb.after - 2.0).abs() < f64::EPSILON);
        let dsq = changes
            .iter()
            .find(|c| c.metric == "dsq_depth")
            .expect("dsq_depth change must fire (1.0 -> 6.0 crosses 3.0)");
        assert_eq!(dsq.direction, ChangeDirection::Degraded);
        assert!((dsq.before - 1.0).abs() < f64::EPSILON);
        assert!((dsq.after - 6.0).abs() < f64::EPSILON);
    }

    /// Sub-threshold deltas record NO change — guards a dropped/zeroed
    /// threshold that would fabricate spurious boundary changes.
    #[test]
    fn from_phase_buckets_subthreshold_records_no_change() {
        use crate::assert::PhaseBucket;
        use std::collections::BTreeMap;
        let mut base = BTreeMap::new();
        base.insert("avg_imbalance_ratio".to_string(), 1.0);
        base.insert("avg_dsq_depth".to_string(), 1.0);
        let mut step = BTreeMap::new();
        step.insert("avg_imbalance_ratio".to_string(), 1.2); // +0.2 < 0.5
        step.insert("avg_dsq_depth".to_string(), 2.0); // +1.0 < 3.0
        let buckets = vec![
            PhaseBucket {
                step_index: 0,
                label: "BASELINE".to_string(),
                start_ms: 0,
                end_ms: 1000,
                sample_count: 5,
                metrics: base,
            },
            PhaseBucket {
                step_index: 1,
                label: "Step[0]".to_string(),
                start_ms: 1000,
                end_ms: 6000,
                sample_count: 20,
                metrics: step,
            },
        ];
        let t = Timeline::from_phase_buckets(&buckets, &[], &TimelineContext::default());
        assert!(
            t.phases[1].changes.is_empty(),
            "sub-threshold deltas must not record a boundary change",
        );
    }

    /// Decreasing imbalance across the boundary records an IMPROVEMENT —
    /// locks the higher_is_worse direction so an inverted flag cannot
    /// report a regression as an improvement (or vice versa).
    #[test]
    fn from_phase_buckets_detects_boundary_improvement() {
        use crate::assert::PhaseBucket;
        use std::collections::BTreeMap;
        let mut base = BTreeMap::new();
        base.insert("avg_imbalance_ratio".to_string(), 2.0);
        let mut step = BTreeMap::new();
        step.insert("avg_imbalance_ratio".to_string(), 1.0); // -1.0, |delta|>0.5, after<before
        let buckets = vec![
            PhaseBucket {
                step_index: 0,
                label: "BASELINE".to_string(),
                start_ms: 0,
                end_ms: 1000,
                sample_count: 5,
                metrics: base,
            },
            PhaseBucket {
                step_index: 1,
                label: "Step[0]".to_string(),
                start_ms: 1000,
                end_ms: 6000,
                sample_count: 20,
                metrics: step,
            },
        ];
        let t = Timeline::from_phase_buckets(&buckets, &[], &TimelineContext::default());
        let imb = t.phases[1]
            .changes
            .iter()
            .find(|c| c.metric == "imbalance")
            .expect("imbalance change must fire (2.0 -> 1.0 crosses 0.5)");
        assert_eq!(
            imb.direction,
            ChangeDirection::Improved,
            "a decreasing imbalance is an improvement, not a degradation",
        );
    }

    /// from_phase_buckets must CORRELATE a real stimulus event into the
    /// phase header, carrying its op_kind + detail. Every other
    /// from_phase_buckets test passes `&[]`, so only the synthetic
    /// None-placeholder arm ran and the `Some(ev) => (*ev).clone()`
    /// correlation arm (added to stop headers degrading to "Step[N]: ?")
    /// was untested. A wrong interval bound or cloning the wrong event
    /// would drop the operator-facing op/detail with no failure.
    #[test]
    fn from_phase_buckets_correlates_real_stimulus_op_and_detail() {
        use crate::assert::PhaseBucket;
        use std::collections::BTreeMap;
        let event = StimulusEvent {
            elapsed_ms: 1000,
            label: "Step[0]".to_string(),
            op_kind: Some("SetCpuset".to_string()),
            detail: Some("4 cpus".to_string()),
            total_iterations: None,
            step_index: Some(1),
            is_terminal: false,
            is_step_end: false,
        };
        let buckets = vec![
            PhaseBucket {
                step_index: 0,
                label: "BASELINE".to_string(),
                start_ms: 0,
                end_ms: 1000,
                sample_count: 5,
                metrics: BTreeMap::new(),
            },
            PhaseBucket {
                step_index: 1,
                label: "Step[0]".to_string(),
                start_ms: 1000,
                end_ms: 6000,
                sample_count: 20,
                metrics: BTreeMap::new(),
            },
        ];
        let t = Timeline::from_phase_buckets(&buckets, &[event], &TimelineContext::default());
        let stim = t.phases[1]
            .stimulus
            .as_ref()
            .expect("Step[0] phase carries a stimulus");
        assert_eq!(
            stim.op_kind.as_deref(),
            Some("SetCpuset"),
            "the correlated event's op_kind must be carried, not the None placeholder",
        );
        assert_eq!(stim.detail.as_deref(), Some("4 cpus"));
    }

    #[test]
    fn from_phase_buckets_zero_duration_window_emits_no_rate() {
        use crate::assert::PhaseBucket;
        use std::collections::BTreeMap;
        let mut metrics = BTreeMap::new();
        metrics.insert("total_fallback".to_string(), 100.0);
        let bucket = PhaseBucket {
            step_index: 1,
            label: "Step[0]".to_string(),
            start_ms: 500,
            end_ms: 500,
            sample_count: 1,
            metrics,
        };
        let t = Timeline::from_phase_buckets(&[bucket], &[], &TimelineContext::default());
        // Degenerate window (start == end) yields duration_s == 0,
        // so rate divisions stay None rather than producing
        // spurious infinities.
        assert_eq!(t.phases[0].metrics.fallback_rate, None);
    }

    #[test]
    fn from_phase_buckets_absent_imbalance_metric_is_none_not_zero() {
        // A bucket carrying no avg_imbalance_ratio / avg_dsq_depth
        // metric must yield None (no data), NOT Some(0.0) — so the change
        // detector skips it instead of comparing a false zero-imbalance.
        use crate::assert::PhaseBucket;
        use std::collections::BTreeMap;
        let bucket = PhaseBucket {
            step_index: 1,
            label: "Step[0]".to_string(),
            start_ms: 100,
            end_ms: 600,
            sample_count: 3,
            metrics: BTreeMap::new(),
        };
        let t = Timeline::from_phase_buckets(&[bucket], &[], &TimelineContext::default());
        assert_eq!(t.phases[0].metrics.avg_imbalance, None);
        assert_eq!(t.phases[0].metrics.max_imbalance, None);
        assert_eq!(t.phases[0].metrics.avg_dsq_depth, None);
    }

    #[test]
    fn from_phase_buckets_sorts_by_step_index() {
        use crate::assert::PhaseBucket;
        use std::collections::BTreeMap;
        // Out-of-order input; from_phase_buckets must sort by
        // step_index so the rendered phase block walks BASELINE
        // → Step[0] → Step[1] in time order regardless of
        // how the caller arranged the input vec.
        let buckets = vec![
            PhaseBucket {
                step_index: 2,
                label: "Step[1]".to_string(),
                start_ms: 2000,
                end_ms: 3000,
                sample_count: 5,
                metrics: BTreeMap::new(),
            },
            PhaseBucket {
                step_index: 0,
                label: "BASELINE".to_string(),
                start_ms: 0,
                end_ms: 500,
                sample_count: 2,
                metrics: BTreeMap::new(),
            },
            PhaseBucket {
                step_index: 1,
                label: "Step[0]".to_string(),
                start_ms: 500,
                end_ms: 2000,
                sample_count: 5,
                metrics: BTreeMap::new(),
            },
        ];
        let t = Timeline::from_phase_buckets(&buckets, &[], &TimelineContext::default());
        assert_eq!(t.phases.len(), 3);
        assert_eq!(t.phases[0].start_ms, 0);
        assert_eq!(t.phases[1].start_ms, 500);
        assert_eq!(t.phases[2].start_ms, 2000);
    }

    #[test]
    fn iteration_rate_in_formatted_output() {
        let events = vec![
            stimulus_with_iters(0, "ScenarioStart", 0),
            stimulus_with_iters(2000, "StepStart[0]", 5000),
        ];
        let samples: Vec<MonitorSample> = (5..25)
            .map(|i| sample(i * 100, vec![(2, 1, i * 1000), (2, 1, i * 1000 + 100)]))
            .collect();
        let t = Timeline::build(&events, &samples);
        let formatted = t.format_with_context(&TimelineContext::default());
        assert!(
            formatted.contains("throughput:"),
            "format output must contain throughput when iteration_rate is set"
        );
        assert!(formatted.contains("iter/s"));
    }
}