1use crate::analysis::regression::RegressionDetector;
6use crate::metrics::catalog::FullProfile;
7use crate::metrics::export;
8use anyhow::Result;
9use serde::Serialize;
10use std::path::Path;
11
12#[derive(Debug, Serialize)]
14pub struct MetricDiff {
15 pub name: String,
16 pub baseline: f64,
17 pub current: f64,
18 pub change_pct: f64,
19 pub verdict: &'static str,
20}
21
22pub fn diff_profiles(baseline: &FullProfile, current: &FullProfile) -> Vec<MetricDiff> {
24 let mut diffs = Vec::new();
25
26 add_diff(
28 &mut diffs,
29 "wall_clock_time_us",
30 baseline.timing.wall_clock_time_us,
31 current.timing.wall_clock_time_us,
32 true, );
34
35 add_diff(
37 &mut diffs,
38 "tflops",
39 baseline.throughput.tflops,
40 current.throughput.tflops,
41 false, );
43 add_diff(
44 &mut diffs,
45 "bandwidth_gbps",
46 baseline.throughput.bandwidth_gbps,
47 current.throughput.bandwidth_gbps,
48 false,
49 );
50
51 if let (Some(bg), Some(cg)) = (&baseline.gpu_compute, ¤t.gpu_compute) {
53 add_diff(
54 &mut diffs,
55 "sm_utilization_pct",
56 bg.sm_utilization_pct,
57 cg.sm_utilization_pct,
58 false,
59 );
60 add_diff(
61 &mut diffs,
62 "achieved_occupancy_pct",
63 bg.achieved_occupancy_pct,
64 cg.achieved_occupancy_pct,
65 false,
66 );
67 add_diff(
68 &mut diffs,
69 "warp_exec_efficiency_pct",
70 bg.warp_execution_efficiency_pct,
71 cg.warp_execution_efficiency_pct,
72 false,
73 );
74 }
75
76 if let (Some(bm), Some(cm)) = (&baseline.gpu_memory, ¤t.gpu_memory) {
78 add_diff(
79 &mut diffs,
80 "l2_hit_rate_pct",
81 bm.l2_hit_rate_pct,
82 cm.l2_hit_rate_pct,
83 false,
84 );
85 add_diff(
86 &mut diffs,
87 "global_load_efficiency_pct",
88 bm.global_load_efficiency_pct,
89 cm.global_load_efficiency_pct,
90 false,
91 );
92 }
93
94 diffs
95}
96
97fn add_diff(
98 diffs: &mut Vec<MetricDiff>,
99 name: &str,
100 baseline: f64,
101 current: f64,
102 lower_better: bool,
103) {
104 if baseline == 0.0 && current == 0.0 {
105 return;
106 }
107 let change_pct = if baseline != 0.0 {
108 (current - baseline) / baseline * 100.0
109 } else {
110 0.0
111 };
112
113 let verdict = if change_pct.abs() < 2.0 {
114 "="
115 } else if lower_better {
116 if current < baseline {
117 "IMPROVED"
118 } else {
119 "REGRESSED"
120 }
121 } else if current > baseline {
122 "IMPROVED"
123 } else {
124 "REGRESSED"
125 };
126
127 diffs.push(MetricDiff {
128 name: name.to_string(),
129 baseline,
130 current,
131 change_pct,
132 verdict,
133 });
134}
135
136pub fn render_diff(diffs: &[MetricDiff], baseline_name: &str, current_name: &str) {
138 println!("\n=== CGP Profile Diff ===\n");
139 println!(" Baseline: {baseline_name}");
140 println!(" Current: {current_name}\n");
141
142 println!(
143 " {:30} {:>14} {:>14} {:>10} {:>10}",
144 "Metric", "Baseline", "Current", "Change", "Verdict"
145 );
146 println!(" {}", "-".repeat(82));
147
148 for d in diffs {
149 let change_str = format!("{:+.1}%", d.change_pct);
150 println!(
151 " {:30} {:>14.2} {:>14.2} {:>10} {:>10}",
152 d.name, d.baseline, d.current, change_str, d.verdict
153 );
154 }
155
156 let regressions = diffs.iter().filter(|d| d.verdict == "REGRESSED").count();
158 let improvements = diffs.iter().filter(|d| d.verdict == "IMPROVED").count();
159 println!();
160 if regressions > 0 {
161 println!(" \x1b[31m{regressions} regression(s)\x1b[0m, {improvements} improvement(s)");
162 } else if improvements > 0 {
163 println!(" \x1b[32m{improvements} improvement(s)\x1b[0m, no regressions");
164 } else {
165 println!(" No significant changes.");
166 }
167 println!();
168}
169
170pub fn run_diff(
172 baseline: Option<&str>,
173 current: Option<&str>,
174 _before: Option<&str>,
175 _after: Option<&str>,
176 json: bool,
177) -> Result<()> {
178 let (baseline_path, current_path) = resolve_diff_paths(baseline, current)?;
179
180 let start = std::time::Instant::now();
181 let baseline_profile = export::load_json(Path::new(baseline_path))?;
182 let current_profile = export::load_json(Path::new(current_path))?;
183 let diffs = diff_profiles(&baseline_profile, ¤t_profile);
184
185 if json {
186 println!("{}", serde_json::to_string_pretty(&diffs)?);
187 return Ok(());
188 }
189
190 render_diff(&diffs, baseline_path, current_path);
191 print_statistical_summary(&baseline_profile, ¤t_profile);
192
193 let elapsed = start.elapsed();
194 println!(
195 " Diff completed in {:.0}ms",
196 elapsed.as_secs_f64() * 1000.0
197 );
198 println!();
199
200 Ok(())
201}
202
203fn resolve_diff_paths<'a>(
205 baseline: Option<&'a str>,
206 current: Option<&'a str>,
207) -> Result<(&'a str, &'a str)> {
208 match (baseline, current) {
209 (Some(b), Some(c)) => Ok((b, c)),
210 _ => anyhow::bail!(
211 "Usage: cgp diff --baseline <file.json> --current <file.json>\n\
212 Or: cgp diff --before <commit> --after <commit> (not yet implemented)"
213 ),
214 }
215}
216
217fn print_statistical_summary(baseline: &FullProfile, current: &FullProfile) {
220 if baseline.timing.samples > 1 && current.timing.samples > 1 {
221 print_multi_sample_regression(baseline, current);
222 } else {
223 print_single_sample_compare(baseline, current);
224 }
225}
226
227fn print_multi_sample_regression(baseline: &FullProfile, current: &FullProfile) {
228 let detector = RegressionDetector::new();
229 let b_samples = synth_samples(
230 baseline.timing.wall_clock_time_us,
231 baseline.timing.stddev_us,
232 );
233 let c_samples = synth_samples(current.timing.wall_clock_time_us, current.timing.stddev_us);
234 let result = detector.compare(&b_samples, &c_samples);
235 println!(
236 " Statistical: {} (change {:.1}%, Cohen's d = {:.2})",
237 result.verdict, result.change_pct, result.effect_size_cohens_d
238 );
239}
240
241fn synth_samples(mean: f64, stddev: f64) -> Vec<f64> {
242 let std = stddev.max(mean * 0.01);
243 (0..30)
244 .map(|i| mean + std * ((i as f64 - 15.0) / 15.0))
245 .collect()
246}
247
248fn print_single_sample_compare(baseline: &FullProfile, current: &FullProfile) {
249 let b = baseline.timing.wall_clock_time_us;
250 let c = current.timing.wall_clock_time_us;
251 if b <= 0.0 || c <= 0.0 {
252 return;
253 }
254 let change = (c - b) / b * 100.0;
255 let verdict = if change < -5.0 {
256 "IMPROVED"
257 } else if change > 5.0 {
258 "REGRESSED"
259 } else {
260 "NO_CHANGE"
261 };
262 println!(" Statistical: {verdict} (change {change:.1}%, single-sample)");
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268 use crate::metrics::catalog::*;
269
270 fn make_profile(time_us: f64, tflops: f64) -> FullProfile {
271 FullProfile {
272 version: "2.0".to_string(),
273 timing: TimingMetrics {
274 wall_clock_time_us: time_us,
275 samples: 50,
276 ..Default::default()
277 },
278 throughput: ThroughputMetrics {
279 tflops,
280 ..Default::default()
281 },
282 ..Default::default()
283 }
284 }
285
286 #[test]
287 fn test_diff_improvement() {
288 let baseline = make_profile(35.7, 7.5);
289 let current = make_profile(23.2, 11.6);
290 let diffs = diff_profiles(&baseline, ¤t);
291
292 let time_diff = diffs
293 .iter()
294 .find(|d| d.name == "wall_clock_time_us")
295 .unwrap();
296 assert_eq!(time_diff.verdict, "IMPROVED"); assert!(time_diff.change_pct < -30.0);
298
299 let tflops_diff = diffs.iter().find(|d| d.name == "tflops").unwrap();
300 assert_eq!(tflops_diff.verdict, "IMPROVED"); }
302
303 #[test]
304 fn test_diff_regression() {
305 let baseline = make_profile(23.2, 11.6);
306 let current = make_profile(35.7, 7.5);
307 let diffs = diff_profiles(&baseline, ¤t);
308
309 let time_diff = diffs
310 .iter()
311 .find(|d| d.name == "wall_clock_time_us")
312 .unwrap();
313 assert_eq!(time_diff.verdict, "REGRESSED");
314 }
315
316 #[test]
317 fn test_diff_no_change() {
318 let baseline = make_profile(23.2, 11.6);
319 let current = make_profile(23.4, 11.5);
320 let diffs = diff_profiles(&baseline, ¤t);
321
322 let time_diff = diffs
323 .iter()
324 .find(|d| d.name == "wall_clock_time_us")
325 .unwrap();
326 assert_eq!(time_diff.verdict, "="); }
328
329 #[test]
331 fn test_diff_speed() {
332 let baseline = make_profile(23.2, 11.6);
333 let current = make_profile(35.7, 7.5);
334
335 let start = std::time::Instant::now();
336 for _ in 0..100 {
337 let _ = diff_profiles(&baseline, ¤t);
338 }
339 let elapsed = start.elapsed();
340 assert!(
342 elapsed.as_millis() < 100,
343 "100 diffs took {}ms",
344 elapsed.as_millis()
345 );
346 }
347}