use crate::analysis::regression::RegressionDetector;
use crate::metrics::catalog::FullProfile;
use crate::metrics::export;
use anyhow::Result;
use serde::Serialize;
use std::path::Path;
#[derive(Debug, Serialize)]
pub struct MetricDiff {
pub name: String,
pub baseline: f64,
pub current: f64,
pub change_pct: f64,
pub verdict: &'static str,
}
pub fn diff_profiles(baseline: &FullProfile, current: &FullProfile) -> Vec<MetricDiff> {
let mut diffs = Vec::new();
add_diff(
&mut diffs,
"wall_clock_time_us",
baseline.timing.wall_clock_time_us,
current.timing.wall_clock_time_us,
true, );
add_diff(
&mut diffs,
"tflops",
baseline.throughput.tflops,
current.throughput.tflops,
false, );
add_diff(
&mut diffs,
"bandwidth_gbps",
baseline.throughput.bandwidth_gbps,
current.throughput.bandwidth_gbps,
false,
);
if let (Some(bg), Some(cg)) = (&baseline.gpu_compute, ¤t.gpu_compute) {
add_diff(
&mut diffs,
"sm_utilization_pct",
bg.sm_utilization_pct,
cg.sm_utilization_pct,
false,
);
add_diff(
&mut diffs,
"achieved_occupancy_pct",
bg.achieved_occupancy_pct,
cg.achieved_occupancy_pct,
false,
);
add_diff(
&mut diffs,
"warp_exec_efficiency_pct",
bg.warp_execution_efficiency_pct,
cg.warp_execution_efficiency_pct,
false,
);
}
if let (Some(bm), Some(cm)) = (&baseline.gpu_memory, ¤t.gpu_memory) {
add_diff(
&mut diffs,
"l2_hit_rate_pct",
bm.l2_hit_rate_pct,
cm.l2_hit_rate_pct,
false,
);
add_diff(
&mut diffs,
"global_load_efficiency_pct",
bm.global_load_efficiency_pct,
cm.global_load_efficiency_pct,
false,
);
}
diffs
}
fn add_diff(
diffs: &mut Vec<MetricDiff>,
name: &str,
baseline: f64,
current: f64,
lower_better: bool,
) {
if baseline == 0.0 && current == 0.0 {
return;
}
let change_pct = if baseline != 0.0 {
(current - baseline) / baseline * 100.0
} else {
0.0
};
let verdict = if change_pct.abs() < 2.0 {
"="
} else if lower_better {
if current < baseline {
"IMPROVED"
} else {
"REGRESSED"
}
} else if current > baseline {
"IMPROVED"
} else {
"REGRESSED"
};
diffs.push(MetricDiff {
name: name.to_string(),
baseline,
current,
change_pct,
verdict,
});
}
pub fn render_diff(diffs: &[MetricDiff], baseline_name: &str, current_name: &str) {
println!("\n=== CGP Profile Diff ===\n");
println!(" Baseline: {baseline_name}");
println!(" Current: {current_name}\n");
println!(
" {:30} {:>14} {:>14} {:>10} {:>10}",
"Metric", "Baseline", "Current", "Change", "Verdict"
);
println!(" {}", "-".repeat(82));
for d in diffs {
let change_str = format!("{:+.1}%", d.change_pct);
println!(
" {:30} {:>14.2} {:>14.2} {:>10} {:>10}",
d.name, d.baseline, d.current, change_str, d.verdict
);
}
let regressions = diffs.iter().filter(|d| d.verdict == "REGRESSED").count();
let improvements = diffs.iter().filter(|d| d.verdict == "IMPROVED").count();
println!();
if regressions > 0 {
println!(" \x1b[31m{regressions} regression(s)\x1b[0m, {improvements} improvement(s)");
} else if improvements > 0 {
println!(" \x1b[32m{improvements} improvement(s)\x1b[0m, no regressions");
} else {
println!(" No significant changes.");
}
println!();
}
pub fn run_diff(
baseline: Option<&str>,
current: Option<&str>,
_before: Option<&str>,
_after: Option<&str>,
json: bool,
) -> Result<()> {
let (baseline_path, current_path) = resolve_diff_paths(baseline, current)?;
let start = std::time::Instant::now();
let baseline_profile = export::load_json(Path::new(baseline_path))?;
let current_profile = export::load_json(Path::new(current_path))?;
let diffs = diff_profiles(&baseline_profile, ¤t_profile);
if json {
println!("{}", serde_json::to_string_pretty(&diffs)?);
return Ok(());
}
render_diff(&diffs, baseline_path, current_path);
print_statistical_summary(&baseline_profile, ¤t_profile);
let elapsed = start.elapsed();
println!(
" Diff completed in {:.0}ms",
elapsed.as_secs_f64() * 1000.0
);
println!();
Ok(())
}
fn resolve_diff_paths<'a>(
baseline: Option<&'a str>,
current: Option<&'a str>,
) -> Result<(&'a str, &'a str)> {
match (baseline, current) {
(Some(b), Some(c)) => Ok((b, c)),
_ => anyhow::bail!(
"Usage: cgp diff --baseline <file.json> --current <file.json>\n\
Or: cgp diff --before <commit> --after <commit> (not yet implemented)"
),
}
}
fn print_statistical_summary(baseline: &FullProfile, current: &FullProfile) {
if baseline.timing.samples > 1 && current.timing.samples > 1 {
print_multi_sample_regression(baseline, current);
} else {
print_single_sample_compare(baseline, current);
}
}
fn print_multi_sample_regression(baseline: &FullProfile, current: &FullProfile) {
let detector = RegressionDetector::new();
let b_samples = synth_samples(
baseline.timing.wall_clock_time_us,
baseline.timing.stddev_us,
);
let c_samples = synth_samples(current.timing.wall_clock_time_us, current.timing.stddev_us);
let result = detector.compare(&b_samples, &c_samples);
println!(
" Statistical: {} (change {:.1}%, Cohen's d = {:.2})",
result.verdict, result.change_pct, result.effect_size_cohens_d
);
}
fn synth_samples(mean: f64, stddev: f64) -> Vec<f64> {
let std = stddev.max(mean * 0.01);
(0..30)
.map(|i| mean + std * ((i as f64 - 15.0) / 15.0))
.collect()
}
fn print_single_sample_compare(baseline: &FullProfile, current: &FullProfile) {
let b = baseline.timing.wall_clock_time_us;
let c = current.timing.wall_clock_time_us;
if b <= 0.0 || c <= 0.0 {
return;
}
let change = (c - b) / b * 100.0;
let verdict = if change < -5.0 {
"IMPROVED"
} else if change > 5.0 {
"REGRESSED"
} else {
"NO_CHANGE"
};
println!(" Statistical: {verdict} (change {change:.1}%, single-sample)");
}
#[cfg(test)]
mod tests {
use super::*;
use crate::metrics::catalog::*;
fn make_profile(time_us: f64, tflops: f64) -> FullProfile {
FullProfile {
version: "2.0".to_string(),
timing: TimingMetrics {
wall_clock_time_us: time_us,
samples: 50,
..Default::default()
},
throughput: ThroughputMetrics {
tflops,
..Default::default()
},
..Default::default()
}
}
#[test]
fn test_diff_improvement() {
let baseline = make_profile(35.7, 7.5);
let current = make_profile(23.2, 11.6);
let diffs = diff_profiles(&baseline, ¤t);
let time_diff = diffs
.iter()
.find(|d| d.name == "wall_clock_time_us")
.unwrap();
assert_eq!(time_diff.verdict, "IMPROVED"); assert!(time_diff.change_pct < -30.0);
let tflops_diff = diffs.iter().find(|d| d.name == "tflops").unwrap();
assert_eq!(tflops_diff.verdict, "IMPROVED"); }
#[test]
fn test_diff_regression() {
let baseline = make_profile(23.2, 11.6);
let current = make_profile(35.7, 7.5);
let diffs = diff_profiles(&baseline, ¤t);
let time_diff = diffs
.iter()
.find(|d| d.name == "wall_clock_time_us")
.unwrap();
assert_eq!(time_diff.verdict, "REGRESSED");
}
#[test]
fn test_diff_no_change() {
let baseline = make_profile(23.2, 11.6);
let current = make_profile(23.4, 11.5);
let diffs = diff_profiles(&baseline, ¤t);
let time_diff = diffs
.iter()
.find(|d| d.name == "wall_clock_time_us")
.unwrap();
assert_eq!(time_diff.verdict, "="); }
#[test]
fn test_diff_speed() {
let baseline = make_profile(23.2, 11.6);
let current = make_profile(35.7, 7.5);
let start = std::time::Instant::now();
for _ in 0..100 {
let _ = diff_profiles(&baseline, ¤t);
}
let elapsed = start.elapsed();
assert!(
elapsed.as_millis() < 100,
"100 diffs took {}ms",
elapsed.as_millis()
);
}
}