use super::*;
#[derive(Debug, Clone, serde::Serialize)]
pub(crate) struct Finding {
pub pairing_key: PairingKey,
pub scenario: String,
pub topology: String,
pub work_type: String,
pub metric: &'static MetricDef,
pub val_a: f64,
pub val_b: f64,
pub delta: f64,
pub is_regression: bool,
}
#[derive(Debug, Clone, Default, serde::Serialize)]
pub(crate) struct CompareReport {
pub regressions: u32,
pub improvements: u32,
pub unchanged: u32,
pub excluded_pairs: u32,
pub new_in_b: u32,
pub removed_from_a: u32,
pub findings: Vec<Finding>,
pub phase_deltas: Vec<PhaseDeltaRow>,
pub unpaired_phases: Vec<UnpairedPhaseRow>,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Serialize)]
pub(crate) enum ComparePartition {
A,
B,
}
impl ComparePartition {
pub fn as_str(self) -> &'static str {
match self {
Self::A => "A",
Self::B => "B",
}
}
}
#[derive(Clone, Debug, serde::Serialize)]
pub(crate) struct PhaseDeltaRow {
pub pairing_key: PairingKey,
pub step_index: u16,
pub label: String,
pub metric: &'static MetricDef,
pub a: f64,
pub b: f64,
pub delta: f64,
pub is_regression: bool,
}
#[derive(Clone, Debug, serde::Serialize)]
pub(crate) struct UnpairedPhaseRow {
pub side: ComparePartition,
pub pairing_key: PairingKey,
pub step_index: u16,
pub label: String,
pub metrics: std::collections::BTreeMap<String, f64>,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
#[serde(default, deny_unknown_fields)]
pub struct ComparisonPolicy {
pub default_percent: Option<f64>,
pub per_metric_percent: BTreeMap<String, f64>,
}
#[derive(Debug, Default, Clone)]
pub struct PhaseDisplayOptions {
pub no_phases: bool,
pub phases_only: bool,
pub steps_only: bool,
pub phase: Option<u16>,
pub phase_threshold: Option<f64>,
}
impl PhaseDisplayOptions {
pub fn rel_threshold(
&self,
policy: &ComparisonPolicy,
metric_name: &str,
default_rel: f64,
) -> f64 {
match self.phase_threshold {
Some(pct) => pct / 100.0,
None => policy.rel_threshold(metric_name, default_rel),
}
}
pub fn matches_phase(&self, step_index: u16) -> bool {
if let Some(want) = self.phase
&& step_index != want
{
return false;
}
if self.steps_only && step_index == 0 {
return false;
}
true
}
pub(crate) fn passes_delta_threshold(&self, delta: &PhaseDeltaRow) -> bool {
let Some(pct) = self.phase_threshold else {
return true;
};
let denom = delta.a.abs().max(1.0);
let rel = delta.delta.abs() / denom;
rel >= pct / 100.0
}
}
impl ComparisonPolicy {
pub fn new() -> Self {
Self::default()
}
pub fn uniform(percent: f64) -> Self {
Self {
default_percent: Some(percent),
per_metric_percent: BTreeMap::new(),
}
}
pub fn load_json(path: &std::path::Path) -> anyhow::Result<Self> {
use anyhow::Context;
let data = std::fs::read_to_string(path)
.with_context(|| format!("read comparison policy from {}", path.display()))?;
let policy: ComparisonPolicy = serde_json::from_str(&data)
.with_context(|| format!("parse comparison policy from {}", path.display()))?;
policy
.validate()
.with_context(|| format!("validate comparison policy from {}", path.display()))?;
Ok(policy)
}
pub fn validate(&self) -> anyhow::Result<()> {
if let Some(p) = self.default_percent
&& p < 0.0
{
anyhow::bail!(
"ComparisonPolicy: default_percent must be non-negative; got {p}. \
Thresholds are absolute-value comparisons — a negative value \
would invert the dual-gate logic and silently classify every \
delta as significant."
);
}
for (name, p) in &self.per_metric_percent {
if !METRICS.iter().any(|m| m.name == name) {
let known: Vec<&str> = METRICS.iter().map(|m| m.name).collect();
anyhow::bail!(
"ComparisonPolicy: per_metric_percent contains unknown \
metric `{name}`. A typo in the key would silently fall \
through to default_percent. Registered metrics: {}",
known.join(", "),
);
}
if *p < 0.0 {
anyhow::bail!(
"ComparisonPolicy: per_metric_percent[{name:?}] must be \
non-negative; got {p}",
);
}
}
Ok(())
}
pub fn from_cli_flags(
threshold: Option<f64>,
policy: Option<&std::path::Path>,
) -> anyhow::Result<Self> {
match (threshold, policy) {
(Some(t), None) => {
let p = Self::uniform(t);
p.validate()?;
Ok(p)
}
(None, Some(path)) => Self::load_json(path),
(None, None) => Ok(Self::default()),
(Some(_), Some(_)) => anyhow::bail!(
"--threshold and --policy are mutually exclusive; use --policy \
for per-metric overrides"
),
}
}
pub fn rel_threshold(&self, metric_name: &str, default_rel: f64) -> f64 {
if let Some(p) = self.per_metric_percent.get(metric_name) {
p / 100.0
} else if let Some(p) = self.default_percent {
p / 100.0
} else {
default_rel
}
}
}
pub(crate) fn compare_rows_by(
rows_a: &[GauntletRow],
rows_b: &[GauntletRow],
pairing_dims: &[Dimension],
filter: Option<&str>,
policy: &ComparisonPolicy,
) -> CompareReport {
let mut report = CompareReport::default();
let mut a_by_key: HashMap<PairingKey, &GauntletRow> = HashMap::with_capacity(rows_a.len());
for row_a in rows_a {
let key = PairingKey::from_row(row_a, pairing_dims);
a_by_key.entry(key).or_insert(row_a);
}
let rel_thresholds: Vec<f64> = METRICS
.iter()
.map(|m| policy.rel_threshold(m.name, m.default_rel))
.collect();
let suppressed: Vec<bool> = METRICS
.iter()
.map(|m| is_render_suppressed_component(m.name))
.collect();
for row_b in rows_b {
let key_b = PairingKey::from_row(row_b, pairing_dims);
if let Some(f) = filter {
let joined = format!(
"{} {} {} {}",
row_b.scenario, row_b.topology, row_b.scheduler, row_b.work_type,
);
if !joined.contains(f) {
continue;
}
}
let Some(&row_a) = a_by_key.get(&key_b) else {
report.new_in_b += 1;
continue;
};
if row_a.is_fail()
|| row_b.is_fail()
|| row_a.is_inconclusive()
|| row_b.is_inconclusive()
|| row_a.is_skip()
|| row_b.is_skip()
{
report.excluded_pairs += 1;
continue;
}
push_scalar_findings(
&mut report,
row_a,
row_b,
&key_b,
&rel_thresholds,
&suppressed,
);
push_phase_deltas(&mut report, row_a, row_b, &key_b, policy);
}
let b_keys: HashSet<PairingKey> = rows_b
.iter()
.map(|r| PairingKey::from_row(r, pairing_dims))
.collect();
for row_a in rows_a {
let key_a = PairingKey::from_row(row_a, pairing_dims);
if let Some(f) = filter {
let joined = format!(
"{} {} {} {}",
row_a.scenario, row_a.topology, row_a.scheduler, row_a.work_type,
);
if !joined.contains(f) {
continue;
}
}
if !b_keys.contains(&key_a) {
report.removed_from_a += 1;
}
}
report
}
fn push_scalar_findings(
report: &mut CompareReport,
row_a: &GauntletRow,
row_b: &GauntletRow,
key_b: &PairingKey,
rel_thresholds: &[f64],
suppressed: &[bool],
) {
for (i, m) in METRICS.iter().enumerate() {
if suppressed[i] {
continue;
}
let val_a = m.read(row_a).unwrap_or(0.0);
let val_b = m.read(row_b).unwrap_or(0.0);
if val_a.abs() < f64::EPSILON && val_b.abs() < f64::EPSILON {
continue;
}
let rel_thresh = rel_thresholds[i];
let delta = val_b - val_a;
let rel_delta = if val_a.abs() > f64::EPSILON {
(delta / val_a).abs()
} else {
0.0
};
if delta.abs() < m.default_abs || rel_delta < rel_thresh {
report.unchanged += 1;
continue;
}
let is_regression = if m.higher_is_worse() {
delta > 0.0
} else {
delta < 0.0
};
if is_regression {
report.regressions += 1;
} else {
report.improvements += 1;
}
report.findings.push(Finding {
pairing_key: key_b.clone(),
scenario: row_b.scenario.clone(),
topology: row_b.topology.clone(),
work_type: row_b.work_type.clone(),
metric: m,
val_a,
val_b,
delta,
is_regression,
});
}
}
fn push_phase_deltas(
report: &mut CompareReport,
row_a: &GauntletRow,
row_b: &GauntletRow,
key_b: &PairingKey,
policy: &ComparisonPolicy,
) {
if !row_a.phases.is_empty() && !row_b.phases.is_empty() {
let a_by_step: std::collections::BTreeMap<u16, &crate::assert::PhaseBucket> =
row_a.phases.iter().map(|p| (p.step_index, p)).collect();
let b_by_step: std::collections::BTreeMap<u16, &crate::assert::PhaseBucket> =
row_b.phases.iter().map(|p| (p.step_index, p)).collect();
let union: std::collections::BTreeSet<u16> =
a_by_step.keys().chain(b_by_step.keys()).copied().collect();
for step_index in union {
match (a_by_step.get(&step_index), b_by_step.get(&step_index)) {
(Some(pa), Some(pb)) => {
for (metric_name, &val_a) in &pa.metrics {
if is_render_suppressed_component(metric_name) {
continue;
}
let Some(&val_b) = pb.metrics.get(metric_name) else {
continue;
};
let Some(metric_def) = metric_def(metric_name) else {
continue;
};
let delta = val_b - val_a;
let rel_thresh =
policy.rel_threshold(metric_def.name, metric_def.default_rel);
let rel_delta = if val_a.abs() > f64::EPSILON {
(delta / val_a).abs()
} else {
0.0
};
let below_dual_gate =
delta.abs() < metric_def.default_abs || rel_delta < rel_thresh;
let is_regression = if below_dual_gate {
false
} else if metric_def.higher_is_worse() {
delta > 0.0
} else {
delta < 0.0
};
report.phase_deltas.push(PhaseDeltaRow {
pairing_key: key_b.clone(),
step_index,
label: pa.label.clone(),
metric: metric_def,
a: val_a,
b: val_b,
delta,
is_regression,
});
}
}
(Some(orphan), None) => {
report.unpaired_phases.push(UnpairedPhaseRow {
side: ComparePartition::A,
pairing_key: key_b.clone(),
step_index,
label: orphan.label.clone(),
metrics: metrics_without_suppressed(&orphan.metrics),
});
}
(None, Some(orphan)) => {
report.unpaired_phases.push(UnpairedPhaseRow {
side: ComparePartition::B,
pairing_key: key_b.clone(),
step_index,
label: orphan.label.clone(),
metrics: metrics_without_suppressed(&orphan.metrics),
});
}
(None, None) => {
unreachable!("step_index taken from union of a_by_step / b_by_step keys")
}
}
}
}
}
fn warn_on_dirty_builds(rows_a: &[GauntletRow], rows_b: &[GauntletRow]) {
if let Some(text) = render_dirty_warning(rows_a, rows_b) {
eprint!("{text}");
}
}
fn warn_on_overcommit(rows_a: &[GauntletRow], rows_b: &[GauntletRow], pairing_dims: &[Dimension]) {
if let Some(text) = render_overcommit_warning(rows_a, rows_b, pairing_dims) {
eprint!("{text}");
}
}
pub(crate) fn render_overcommit_warning(
rows_a: &[GauntletRow],
rows_b: &[GauntletRow],
pairing_dims: &[Dimension],
) -> Option<String> {
use std::collections::BTreeSet;
use std::fmt::Write;
let overcommitted = |rows: &[GauntletRow]| -> BTreeSet<(u32, u32)> {
let mut over = BTreeSet::new();
for r in rows {
if let (Some(b), Some(v)) = (r.cpu_budget, r.vcpus)
&& b < v
{
over.insert((b, v));
}
}
over
};
let cpu_budget_is_pairing = pairing_dims.contains(&Dimension::CpuBudget);
let mixed_folded = |rows: &[GauntletRow]| -> BTreeSet<u32> {
let mut folded = BTreeSet::new();
if cpu_budget_is_pairing {
return folded;
}
let mut by_key: std::collections::HashMap<PairingKey, BTreeSet<u32>> =
std::collections::HashMap::new();
for r in rows {
if let Some(b) = r.cpu_budget {
by_key
.entry(PairingKey::from_row(r, pairing_dims))
.or_default()
.insert(b);
}
}
for budgets in by_key.values() {
if budgets.len() > 1 {
folded.extend(budgets.iter().copied());
}
}
folded
};
let over_a = overcommitted(rows_a);
let over_b = overcommitted(rows_b);
let mixed_a = mixed_folded(rows_a);
let mixed_b = mixed_folded(rows_b);
if over_a.is_empty() && over_b.is_empty() && mixed_a.is_empty() && mixed_b.is_empty() {
return None;
}
let any_overcommit = !over_a.is_empty() || !over_b.is_empty();
let mut out = String::new();
if any_overcommit {
let _ = writeln!(
out,
"ktstr: WARNING: CPU-budget hazard in this comparison — a run was \
host-overcommitted, so its guest-scheduler timing metrics \
(wake-latency / off-CPU / run-delay) are host-contention-confounded. \
Compare the overcommit-invariant worst_iterations_per_cpu_sec metric \
(`stats compare --metric worst_iterations_per_cpu_sec`) instead of raw \
timing."
);
} else {
let _ = writeln!(
out,
"ktstr: WARNING: CPU-budget hazard in this comparison — runs of \
different CPU budgets share a pairing group, mixing two measurement \
conditions. Slice with --cpu-budget, or compare the budget-invariant \
worst_iterations_per_cpu_sec metric."
);
}
let mut emit_side = |label: &str, over: &BTreeSet<(u32, u32)>, mixed: &BTreeSet<u32>| {
if !over.is_empty() {
let list = over
.iter()
.map(|(b, v)| format!("{b}/{v}"))
.collect::<Vec<_>>()
.join(", ");
let _ = writeln!(
out,
" side {label}: host-overcommitted run(s) [budget/vcpus]: {list}"
);
}
if !mixed.is_empty() {
let list = mixed
.iter()
.map(|b| b.to_string())
.collect::<Vec<_>>()
.join(", ");
let _ = writeln!(
out,
" side {label}: CPU budgets [{list}] share a pairing group — \
--average folds them into one mean (--no-average rejects them as \
duplicate keys); slice with --cpu-budget so cross-budget runs are \
not compared under one key"
);
}
};
emit_side("A", &over_a, &mixed_a);
emit_side("B", &over_b, &mixed_b);
Some(out)
}
pub(crate) fn render_dirty_warning(
rows_a: &[GauntletRow],
rows_b: &[GauntletRow],
) -> Option<String> {
use std::collections::BTreeSet;
use std::fmt::Write;
let mut dirty_kernel: BTreeSet<&str> = BTreeSet::new();
let mut dirty_project: BTreeSet<&str> = BTreeSet::new();
for row in rows_a.iter().chain(rows_b.iter()) {
if let Some(c) = row.kernel_commit.as_deref()
&& c.ends_with("-dirty")
{
dirty_kernel.insert(c);
}
if let Some(c) = row.commit.as_deref()
&& c.ends_with("-dirty")
{
dirty_project.insert(c);
}
}
if dirty_kernel.is_empty() && dirty_project.is_empty() {
return None;
}
let mut out = String::new();
writeln!(out, "warning: comparison includes dirty builds:").unwrap();
for v in &dirty_kernel {
writeln!(
out,
" - kernel source: {v} (working tree may have changed since this run)"
)
.unwrap();
}
for v in &dirty_project {
writeln!(
out,
" - project: {v} (working tree may have changed since this run)"
)
.unwrap();
}
writeln!(
out,
" Dirty runs overwrite previous results with the same HEAD."
)
.unwrap();
writeln!(out, " Commit changes for reproducible-ish comparisons.").unwrap();
Some(out)
}
pub(crate) fn zero_match_diagnostic(
side: &str,
filter: &RowFilter,
rows: &[GauntletRow],
pool_len: usize,
) -> String {
let mut msg = format!(
"stats compare: {side} side filter matched 0 sidecars in \
pool ({pool_len} pooled). Check the per-side filters or \
confirm the runs exist with `cargo ktstr stats list`."
);
let mut dirty_hints: Vec<String> = Vec::new();
for want in &filter.project_commits {
let dirty = format!("{want}-dirty");
let found = rows
.iter()
.any(|r| r.commit.as_deref() == Some(dirty.as_str()));
if found {
dirty_hints.push(format!(
"no rows match `--project-commit {want}` but `{dirty}` exists in the pool — \
did you mean `--project-commit {dirty}`?"
));
}
}
for want in &filter.kernel_commits {
let dirty = format!("{want}-dirty");
let found = rows
.iter()
.any(|r| r.kernel_commit.as_deref() == Some(dirty.as_str()));
if found {
dirty_hints.push(format!(
"no rows match `--kernel-commit {want}` but `{dirty}` exists in the pool — \
did you mean `--kernel-commit {dirty}`?"
));
}
}
for hint in dirty_hints {
msg.push_str("\nhint: ");
msg.push_str(&hint);
}
if !filter.run_sources.is_empty() {
let pool_run_sources: std::collections::BTreeSet<&str> = rows
.iter()
.filter_map(|r| r.run_source.as_deref())
.collect();
let unknowns: Vec<&str> = filter
.run_sources
.iter()
.map(String::as_str)
.filter(|want| !pool_run_sources.contains(*want))
.collect();
if !unknowns.is_empty() {
let mut present: Vec<&str> = pool_run_sources.iter().copied().collect();
present.sort_unstable();
let unknown_list = unknowns
.iter()
.map(|s| format!("`{s}`"))
.collect::<Vec<_>>()
.join(", ");
let present_list = if present.is_empty() {
"(none — every row has `run_source: null`)".to_string()
} else {
present
.iter()
.map(|s| format!("`{s}`"))
.collect::<Vec<_>>()
.join(", ")
};
msg.push_str(&format!(
"\nhint: --run-source {unknown_list} not found in pool; \
distinct values present: {present_list}. Values are \
case-sensitive (`ci` ≠ `CI`)."
));
}
}
if !filter.cpu_budgets.is_empty() {
let pool_budgets: std::collections::BTreeSet<u32> =
rows.iter().filter_map(|r| r.cpu_budget).collect();
let present_strs: std::collections::BTreeSet<String> =
pool_budgets.iter().map(|b| b.to_string()).collect();
let unknowns: Vec<&str> = filter
.cpu_budgets
.iter()
.map(String::as_str)
.filter(|want| !present_strs.contains(*want))
.collect();
if !unknowns.is_empty() {
let unknown_list = unknowns
.iter()
.map(|s| format!("`{s}`"))
.collect::<Vec<_>>()
.join(", ");
let present_list = if pool_budgets.is_empty() {
"(none — every row is a skip with no recorded budget)".to_string()
} else {
pool_budgets
.iter()
.map(|b| format!("`{b}`"))
.collect::<Vec<_>>()
.join(", ")
};
msg.push_str(&format!(
"\nhint: --cpu-budget {unknown_list} not found in pool; \
distinct budgets present: {present_list}."
));
}
}
let touched_commit_dim =
!filter.project_commits.is_empty() || !filter.kernel_commits.is_empty();
if touched_commit_dim {
msg.push_str(
"\nhint: run `cargo ktstr stats list-values` to see every \
distinct commit value present in the pool — the specific \
value the filter expected may not have a sidecar yet, or \
may differ from what was recorded by \
`detect_project_commit` / `detect_kernel_commit`.",
);
}
msg
}
struct PartitionedComparison {
slicing_dims: Vec<Dimension>,
pairing_dims: Vec<Dimension>,
pool: Vec<crate::test_support::SidecarResult>,
rows: Vec<GauntletRow>,
rows_a_for_compare: Vec<GauntletRow>,
rows_b_for_compare: Vec<GauntletRow>,
avg_a: Option<Vec<AveragedGroup>>,
avg_b: Option<Vec<AveragedGroup>>,
pre_agg_a: usize,
pre_agg_b: usize,
}
fn prepare_partitioned_comparison(
filter_a: &RowFilter,
filter_b: &RowFilter,
dir: Option<&std::path::Path>,
no_average: bool,
) -> anyhow::Result<PartitionedComparison> {
let slicing_dims = derive_slicing_dims(filter_a, filter_b);
if slicing_dims.is_empty() {
anyhow::bail!(
"stats compare: A and B select identical rows. \
Specify at least one per-side filter (e.g. \
--a-kernel 6.14 --b-kernel 6.15) to define what \
dimension separates the two sides."
);
}
if slicing_dims.len() > 1 {
let dim_names: Vec<&str> = slicing_dims.iter().map(|d| d.name()).collect();
eprintln!(
"warning: stats compare: slicing on {n} dimensions [{dims}]; \
results compress multiple axes into a single A/B contrast.",
n = slicing_dims.len(),
dims = dim_names.join(", "),
);
}
let pairing_dims = Dimension::pairing_dims(&slicing_dims);
let (root, override_archive) = match dir {
Some(d) => (d.to_path_buf(), true),
None => (crate::test_support::runs_root(), false),
};
let mut pool = crate::test_support::collect_pool(&root);
if override_archive {
crate::test_support::apply_archive_source_override(&mut pool);
}
if pool.is_empty() {
anyhow::bail!(
"stats compare: no sidecar data found under {}. \
Run `cargo ktstr test` to generate runs, or pass \
--dir to point at an archived sidecar tree.",
root.display(),
);
}
let rows: Vec<GauntletRow> = pool.iter().map(sidecar_to_row).collect();
let rows_a = apply_row_filters(&rows, filter_a);
let rows_b = apply_row_filters(&rows, filter_b);
if rows_a.is_empty() {
anyhow::bail!(
"{}",
zero_match_diagnostic("A", filter_a, &rows, pool.len()),
);
}
if rows_b.is_empty() {
anyhow::bail!(
"{}",
zero_match_diagnostic("B", filter_b, &rows, pool.len()),
);
}
warn_on_dirty_builds(&rows_a, &rows_b);
warn_on_overcommit(&rows_a, &rows_b, &pairing_dims);
let pre_agg_a = rows_a.len();
let pre_agg_b = rows_b.len();
let (rows_a_for_compare, rows_b_for_compare, avg_a, avg_b) = if !no_average {
let avg_a = group_and_average_by(&rows_a, &pairing_dims);
let avg_b = group_and_average_by(&rows_b, &pairing_dims);
let a_rows: Vec<GauntletRow> = avg_a.iter().map(|r| r.row.clone()).collect();
let b_rows: Vec<GauntletRow> = avg_b.iter().map(|r| r.row.clone()).collect();
(a_rows, b_rows, Some(avg_a), Some(avg_b))
} else {
check_no_duplicate_pairing_keys(&rows_a, &pairing_dims, "A")?;
check_no_duplicate_pairing_keys(&rows_b, &pairing_dims, "B")?;
(rows_a, rows_b, None, None)
};
Ok(PartitionedComparison {
slicing_dims,
pairing_dims,
pool,
rows,
rows_a_for_compare,
rows_b_for_compare,
avg_a,
avg_b,
pre_agg_a,
pre_agg_b,
})
}
pub fn compare_partitions(
filter_a: &RowFilter,
filter_b: &RowFilter,
filter: Option<&str>,
policy: &ComparisonPolicy,
dir: Option<&std::path::Path>,
no_average: bool,
phase_opts: &PhaseDisplayOptions,
) -> anyhow::Result<i32> {
let prepared = prepare_partitioned_comparison(filter_a, filter_b, dir, no_average)?;
let PartitionedComparison {
slicing_dims,
pairing_dims,
pool,
rows,
rows_a_for_compare,
rows_b_for_compare,
avg_a,
avg_b,
pre_agg_a,
pre_agg_b,
} = &prepared;
let report = compare_rows_by(
rows_a_for_compare,
rows_b_for_compare,
pairing_dims,
filter,
policy,
);
let label_a = render_side_label(filter_a, slicing_dims, "A");
let label_b = render_side_label(filter_b, slicing_dims, "B");
let slice_names: Vec<&str> = slicing_dims.iter().map(|d| d.name()).collect();
let pair_names: Vec<&str> = pairing_dims.iter().map(|d| d.name()).collect();
println!("slicing dimensions: {}", slice_names.join(", "));
println!(
"pairing on: scenario{}{}",
if pair_names.is_empty() { "" } else { ", " },
pair_names.join(", "),
);
if !no_average {
println!(
"{}",
format_average_header(*pre_agg_a, *pre_agg_b, &label_a, &label_b)
);
}
if !phase_opts.phases_only {
print_scalar_findings_table(&report, &label_a, &label_b);
}
print_phase_block(&report, phase_opts, &label_a, &label_b);
if !phase_opts.phases_only {
print_summary_block(&report, avg_a, avg_b, &label_a, &label_b);
}
if !phase_opts.phases_only {
print_host_context_delta(pool, rows, filter_a, filter_b, &label_a, &label_b);
}
Ok(if report.regressions > 0 { 1 } else { 0 })
}
fn print_scalar_findings_table(report: &CompareReport, label_a: &str, label_b: &str) {
use comfy_table::{Cell, Color};
let mut table = crate::cli::new_table();
table.set_header(vec!["TEST", "METRIC", label_a, label_b, "DELTA", "VERDICT"]);
for f in &report.findings {
let (verdict_text, verdict_color) = if f.is_regression {
("REGRESSION", Color::Red)
} else {
("improvement", Color::Green)
};
let label = f.pairing_key.0.join("/");
table.add_row(vec![
Cell::new(label),
Cell::new(f.metric.name),
Cell::new(format!("{:.2}", f.val_a)),
Cell::new(format!("{:.2}", f.val_b)),
Cell::new(format!("{:+.2}{}", f.delta, f.metric.display_unit)),
Cell::new(verdict_text).fg(verdict_color),
]);
}
println!("{table}");
}
fn print_phase_block(
report: &CompareReport,
phase_opts: &PhaseDisplayOptions,
label_a: &str,
label_b: &str,
) {
use comfy_table::{Cell, Color};
let render_phase_block = !phase_opts.no_phases
&& (!report.phase_deltas.is_empty() || !report.unpaired_phases.is_empty());
if render_phase_block {
let filtered_deltas: Vec<&PhaseDeltaRow> = report
.phase_deltas
.iter()
.filter(|d| phase_opts.matches_phase(d.step_index))
.filter(|d| phase_opts.passes_delta_threshold(d))
.collect();
let filtered_unpaired: Vec<&UnpairedPhaseRow> = report
.unpaired_phases
.iter()
.filter(|u| phase_opts.matches_phase(u.step_index))
.collect();
let filtered_delta_total = filtered_deltas.len();
let filtered_delta_regressions = filtered_deltas.iter().filter(|d| d.is_regression).count();
if !filtered_deltas.is_empty() || !filtered_unpaired.is_empty() {
println!();
println!("phase coverage:");
if !filtered_deltas.is_empty() {
let mut phase_table = crate::cli::new_table();
phase_table.set_header(vec![
"PHASE", "TEST", "METRIC", label_a, label_b, "DELTA", "VERDICT",
]);
let mut sorted_deltas = filtered_deltas;
sorted_deltas.sort_by(|a, b| {
a.step_index
.cmp(&b.step_index)
.then_with(|| a.pairing_key.0.cmp(&b.pairing_key.0))
.then_with(|| a.metric.name.cmp(b.metric.name))
});
for d in sorted_deltas {
let (verdict_text, verdict_color) = if d.is_regression {
("REGRESSION", Color::Red)
} else {
("improvement", Color::Green)
};
let test_label = d.pairing_key.0.join("/");
let phase_cell = format!("{}: {}", d.step_index, d.label);
phase_table.add_row(vec![
Cell::new(phase_cell),
Cell::new(test_label),
Cell::new(d.metric.name),
Cell::new(format!("{:.2}", d.a)),
Cell::new(format!("{:.2}", d.b)),
Cell::new(format!("{:+.2}{}", d.delta, d.metric.display_unit)),
Cell::new(verdict_text).fg(verdict_color),
]);
}
println!("{phase_table}");
}
if !filtered_unpaired.is_empty() {
println!();
println!("phase coverage asymmetry (one-sided phases):");
let mut unpaired_table = crate::cli::new_table();
unpaired_table.set_header(vec!["SIDE", "TEST", "PHASE", "METRIC", "VALUE"]);
let mut sorted_unpaired = filtered_unpaired;
sorted_unpaired.sort_by(|a, b| {
a.step_index
.cmp(&b.step_index)
.then_with(|| a.side.as_str().cmp(b.side.as_str()))
.then_with(|| a.pairing_key.0.cmp(&b.pairing_key.0))
});
for u in sorted_unpaired {
let test_label = u.pairing_key.0.join("/");
let phase_cell = format!("{}: {}", u.step_index, u.label);
if u.metrics.is_empty() {
unpaired_table.add_row(vec![
Cell::new(u.side.as_str()),
Cell::new(test_label),
Cell::new(phase_cell),
Cell::new("—"),
Cell::new("—"),
]);
} else {
for (metric_name, &value) in &u.metrics {
unpaired_table.add_row(vec![
Cell::new(u.side.as_str()),
Cell::new(&test_label),
Cell::new(&phase_cell),
Cell::new(metric_name),
Cell::new(format!("{value:.2}")),
]);
}
}
}
println!("{unpaired_table}");
}
let any_flag_set = phase_opts.phases_only
|| phase_opts.steps_only
|| phase_opts.phase.is_some()
|| phase_opts.phase_threshold.is_some();
if !any_flag_set {
println!(
" phases: {filtered_delta_total} delta row(s) shown \
({filtered_delta_regressions} regression{plural}). \
Filter with --phase N / --phases-only / --steps-only / \
--phase-threshold P / --no-phases.",
plural = if filtered_delta_regressions == 1 {
""
} else {
"s"
},
);
}
}
}
}
fn print_summary_block(
report: &CompareReport,
avg_a: &Option<Vec<AveragedGroup>>,
avg_b: &Option<Vec<AveragedGroup>>,
label_a: &str,
label_b: &str,
) {
println!();
println!(
"summary: {} regressions, {} improvements, {} unchanged",
report.regressions, report.improvements, report.unchanged,
);
if report.excluded_pairs > 0 {
println!(
" {} pairing-key row pair(s) excluded from regression math because one \
or both sides did not pass (failed, inconclusive, or skipped)",
report.excluded_pairs,
);
}
if let (Some(avg_a), Some(avg_b)) = (avg_a, avg_b) {
let block = format_per_group_pass_counts(avg_a, avg_b, label_a, label_b);
if !block.is_empty() {
print!("{block}");
}
}
if report.new_in_b > 0 {
println!(
" {} row(s) new in '{}' (no matching key in '{}')",
report.new_in_b, label_b, label_a,
);
}
if report.removed_from_a > 0 {
println!(
" {} row(s) removed from '{}' (no matching key in '{}')",
report.removed_from_a, label_a, label_b,
);
}
}
fn print_host_context_delta(
pool: &[crate::test_support::SidecarResult],
rows: &[GauntletRow],
filter_a: &RowFilter,
filter_b: &RowFilter,
label_a: &str,
label_b: &str,
) {
let sidecars_a: Vec<&crate::test_support::SidecarResult> = pool
.iter()
.zip(rows.iter())
.filter(|(_, r)| filter_a.matches(r))
.map(|(s, _)| s)
.collect();
let sidecars_b: Vec<&crate::test_support::SidecarResult> = pool
.iter()
.zip(rows.iter())
.filter(|(_, r)| filter_b.matches(r))
.map(|(s, _)| s)
.collect();
let host_a = sidecars_a.iter().find_map(|s| s.host.as_ref());
let host_b = sidecars_b.iter().find_map(|s| s.host.as_ref());
print!("{}", format_host_delta(host_a, host_b, label_a, label_b));
}
pub(crate) fn check_no_duplicate_pairing_keys(
rows: &[GauntletRow],
pairing_dims: &[Dimension],
side_label: &str,
) -> anyhow::Result<()> {
let mut seen: BTreeMap<PairingKey, usize> = BTreeMap::new();
for row in rows {
let key = PairingKey::from_row(row, pairing_dims);
*seen.entry(key).or_insert(0) += 1;
}
if let Some((dup_key, count)) = seen.iter().find(|&(_, &c)| c > 1) {
anyhow::bail!(
"stats compare --no-average: side {side_label} has {count} \
sidecars with the same pairing key {key:?}. Either drop \
--no-average to average them, or add another --{side}-X \
filter to disambiguate.",
key = dup_key.0,
side = side_label.to_lowercase(),
);
}
Ok(())
}
pub(crate) fn format_average_header(
pre_agg_a: usize,
pre_agg_b: usize,
a: &str,
b: &str,
) -> String {
format!("averaged across {pre_agg_a} runs ({a}) and {pre_agg_b} runs ({b})")
}
pub(crate) fn format_per_group_pass_counts(
avg_a: &[AveragedGroup],
avg_b: &[AveragedGroup],
a: &str,
b: &str,
) -> String {
type SummaryKey<'a> = (&'a str, &'a str, &'a str);
type SummaryValue<'a> = (Option<&'a AveragedGroup>, Option<&'a AveragedGroup>);
let mut keys: BTreeMap<SummaryKey<'_>, SummaryValue<'_>> = BTreeMap::new();
for ar in avg_a {
let k = (
ar.row.scenario.as_str(),
ar.row.topology.as_str(),
ar.row.work_type.as_str(),
);
keys.entry(k).or_insert((None, None)).0 = Some(ar);
}
for br in avg_b {
let k = (
br.row.scenario.as_str(),
br.row.topology.as_str(),
br.row.work_type.as_str(),
);
keys.entry(k).or_insert((None, None)).1 = Some(br);
}
if keys.is_empty() {
return String::new();
}
let mut out = String::new();
out.push('\n');
out.push_str(
"per-group pass counts (passes/total + skip/inconc/fail breakdown when non-zero):\n",
);
for ((scn, topo, wt), (ka, kb)) in keys.into_iter() {
let fmt_side = |r: Option<&AveragedGroup>| -> String {
let Some(x) = r else {
return "-".to_string();
};
let mut s = format!("{}/{}", x.passes_observed, x.total_observed);
let mut extras: Vec<String> = Vec::with_capacity(3);
if x.skips_observed > 0 {
extras.push(format!("{} skip", x.skips_observed));
}
if x.inconclusives_observed > 0 {
extras.push(format!("{} inc", x.inconclusives_observed));
}
if x.failures_observed > 0 {
extras.push(format!("{} fail", x.failures_observed));
}
if !extras.is_empty() {
s.push_str(&format!(" ({})", extras.join(", ")));
}
s
};
out.push_str(&format!(
" {scn}/{topo}/{wt}: {a}={pa} {b}={pb}\n",
pa = fmt_side(ka),
pb = fmt_side(kb),
));
}
out
}
pub(crate) fn format_host_delta(
host_a: Option<&crate::host_context::HostContext>,
host_b: Option<&crate::host_context::HostContext>,
a: &str,
b: &str,
) -> String {
match (host_a, host_b) {
(Some(ha), Some(hb)) => {
let delta = ha.diff(hb);
if delta.is_empty() {
match (ha.arch.as_deref(), hb.arch.as_deref()) {
(Some(arch_a), Some(arch_b)) if arch_a == arch_b => {
format!("\nhost: identical between '{a}' and '{b}' (arch: {arch_a})\n",)
}
_ => format!("\nhost: identical between '{a}' and '{b}'\n"),
}
} else {
format!("\nhost delta ('{a}' → '{b}'):\n{delta}")
}
}
(Some(_), None) => {
format!("\nhost: captured in '{a}' only, delta unavailable\n")
}
(None, Some(_)) => {
format!("\nhost: captured in '{b}' only, delta unavailable\n")
}
(None, None) => String::new(),
}
}