use crate::analysis::regression::RegressionDetector;
use crate::metrics::catalog::FullProfile;
use crate::metrics::export;
use anyhow::Result;
use serde::Serialize;
use std::path::Path;
#[derive(Debug, Serialize)]
pub struct MetricDiff {
pub name: String,
pub baseline: f64,
pub current: f64,
pub change_pct: f64,
pub verdict: &'static str,
}
pub fn diff_profiles(baseline: &FullProfile, current: &FullProfile) -> Vec<MetricDiff> {
let mut diffs = Vec::new();
add_diff(
&mut diffs,
"wall_clock_time_us",
baseline.timing.wall_clock_time_us,
current.timing.wall_clock_time_us,
true, );
add_diff(
&mut diffs,
"tflops",
baseline.throughput.tflops,
current.throughput.tflops,
false, );
add_diff(
&mut diffs,
"bandwidth_gbps",
baseline.throughput.bandwidth_gbps,
current.throughput.bandwidth_gbps,
false,
);
if let (Some(bg), Some(cg)) = (&baseline.gpu_compute, ¤t.gpu_compute) {
add_diff(
&mut diffs,
"sm_utilization_pct",
bg.sm_utilization_pct,
cg.sm_utilization_pct,
false,
);
add_diff(
&mut diffs,
"achieved_occupancy_pct",
bg.achieved_occupancy_pct,
cg.achieved_occupancy_pct,
false,
);
add_diff(
&mut diffs,
"warp_exec_efficiency_pct",
bg.warp_execution_efficiency_pct,
cg.warp_execution_efficiency_pct,
false,
);
}
if let (Some(bm), Some(cm)) = (&baseline.gpu_memory, ¤t.gpu_memory) {
add_diff(
&mut diffs,
"l2_hit_rate_pct",
bm.l2_hit_rate_pct,
cm.l2_hit_rate_pct,
false,
);
add_diff(
&mut diffs,
"global_load_efficiency_pct",
bm.global_load_efficiency_pct,
cm.global_load_efficiency_pct,
false,
);
}
diffs
}
fn add_diff(
diffs: &mut Vec<MetricDiff>,
name: &str,
baseline: f64,
current: f64,
lower_better: bool,
) {
if baseline == 0.0 && current == 0.0 {
return;
}
let change_pct = if baseline != 0.0 {
(current - baseline) / baseline * 100.0
} else {
0.0
};
let verdict = if change_pct.abs() < 2.0 {
"="
} else if lower_better {
if current < baseline {
"IMPROVED"
} else {
"REGRESSED"
}
} else if current > baseline {
"IMPROVED"
} else {
"REGRESSED"
};
diffs.push(MetricDiff {
name: name.to_string(),
baseline,
current,
change_pct,
verdict,
});
}
pub fn render_diff(diffs: &[MetricDiff], baseline_name: &str, current_name: &str) {
println!("\n=== CGP Profile Diff ===\n");
println!(" Baseline: {baseline_name}");
println!(" Current: {current_name}\n");
println!(
" {:30} {:>14} {:>14} {:>10} {:>10}",
"Metric", "Baseline", "Current", "Change", "Verdict"
);
println!(" {}", "-".repeat(82));
for d in diffs {
let change_str = format!("{:+.1}%", d.change_pct);
println!(
" {:30} {:>14.2} {:>14.2} {:>10} {:>10}",
d.name, d.baseline, d.current, change_str, d.verdict
);
}
let regressions = diffs.iter().filter(|d| d.verdict == "REGRESSED").count();
let improvements = diffs.iter().filter(|d| d.verdict == "IMPROVED").count();
println!();
if regressions > 0 {
println!(" \x1b[31m{regressions} regression(s)\x1b[0m, {improvements} improvement(s)");
} else if improvements > 0 {
println!(" \x1b[32m{improvements} improvement(s)\x1b[0m, no regressions");
} else {
println!(" No significant changes.");
}
println!();
}
pub fn run_diff(
baseline: Option<&str>,
current: Option<&str>,
_before: Option<&str>,
_after: Option<&str>,
json: bool,
) -> Result<()> {
let (baseline_path, current_path) = match (baseline, current) {
(Some(b), Some(c)) => (b, c),
_ => {
anyhow::bail!(
"Usage: cgp diff --baseline <file.json> --current <file.json>\n\
Or: cgp diff --before <commit> --after <commit> (not yet implemented)"
);
}
};
let start = std::time::Instant::now();
let baseline_profile = export::load_json(Path::new(baseline_path))?;
let current_profile = export::load_json(Path::new(current_path))?;
let diffs = diff_profiles(&baseline_profile, ¤t_profile);
if json {
println!("{}", serde_json::to_string_pretty(&diffs)?);
return Ok(());
}
render_diff(&diffs, baseline_path, current_path);
if baseline_profile.timing.samples > 1 && current_profile.timing.samples > 1 {
let detector = RegressionDetector::new();
let b_mean = baseline_profile.timing.wall_clock_time_us;
let b_std = baseline_profile.timing.stddev_us.max(b_mean * 0.01);
let c_mean = current_profile.timing.wall_clock_time_us;
let c_std = current_profile.timing.stddev_us.max(c_mean * 0.01);
let b_samples: Vec<f64> = (0..30)
.map(|i| b_mean + b_std * ((i as f64 - 15.0) / 15.0))
.collect();
let c_samples: Vec<f64> = (0..30)
.map(|i| c_mean + c_std * ((i as f64 - 15.0) / 15.0))
.collect();
let result = detector.compare(&b_samples, &c_samples);
println!(
" Statistical: {} (change {:.1}%, Cohen's d = {:.2})",
result.verdict, result.change_pct, result.effect_size_cohens_d
);
} else {
let b = baseline_profile.timing.wall_clock_time_us;
let c = current_profile.timing.wall_clock_time_us;
if b > 0.0 && c > 0.0 {
let change = (c - b) / b * 100.0;
let verdict = if change < -5.0 {
"IMPROVED"
} else if change > 5.0 {
"REGRESSED"
} else {
"NO_CHANGE"
};
println!(" Statistical: {verdict} (change {change:.1}%, single-sample)");
}
}
let elapsed = start.elapsed();
println!(
" Diff completed in {:.0}ms",
elapsed.as_secs_f64() * 1000.0
);
println!();
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::metrics::catalog::*;
fn make_profile(time_us: f64, tflops: f64) -> FullProfile {
FullProfile {
version: "2.0".to_string(),
timing: TimingMetrics {
wall_clock_time_us: time_us,
samples: 50,
..Default::default()
},
throughput: ThroughputMetrics {
tflops,
..Default::default()
},
..Default::default()
}
}
#[test]
fn test_diff_improvement() {
let baseline = make_profile(35.7, 7.5);
let current = make_profile(23.2, 11.6);
let diffs = diff_profiles(&baseline, ¤t);
let time_diff = diffs
.iter()
.find(|d| d.name == "wall_clock_time_us")
.unwrap();
assert_eq!(time_diff.verdict, "IMPROVED"); assert!(time_diff.change_pct < -30.0);
let tflops_diff = diffs.iter().find(|d| d.name == "tflops").unwrap();
assert_eq!(tflops_diff.verdict, "IMPROVED"); }
#[test]
fn test_diff_regression() {
let baseline = make_profile(23.2, 11.6);
let current = make_profile(35.7, 7.5);
let diffs = diff_profiles(&baseline, ¤t);
let time_diff = diffs
.iter()
.find(|d| d.name == "wall_clock_time_us")
.unwrap();
assert_eq!(time_diff.verdict, "REGRESSED");
}
#[test]
fn test_diff_no_change() {
let baseline = make_profile(23.2, 11.6);
let current = make_profile(23.4, 11.5);
let diffs = diff_profiles(&baseline, ¤t);
let time_diff = diffs
.iter()
.find(|d| d.name == "wall_clock_time_us")
.unwrap();
assert_eq!(time_diff.verdict, "="); }
#[test]
fn test_diff_speed() {
let baseline = make_profile(23.2, 11.6);
let current = make_profile(35.7, 7.5);
let start = std::time::Instant::now();
for _ in 0..100 {
let _ = diff_profiles(&baseline, ¤t);
}
let elapsed = start.elapsed();
assert!(
elapsed.as_millis() < 100,
"100 diffs took {}ms",
elapsed.as_millis()
);
}
}