Skip to main content

cgp/analysis/
diff.rs

1//! Profile diff: compare two CGP profiles and detect regressions.
2//! Spec section 2.6: cgp diff.
3//! Must complete in <100ms for two saved JSONs (FALSIFY-CGP-062).
4
5use crate::analysis::regression::RegressionDetector;
6use crate::metrics::catalog::FullProfile;
7use crate::metrics::export;
8use anyhow::Result;
9use serde::Serialize;
10use std::path::Path;
11
12/// Diff result for a single metric.
13#[derive(Debug, Serialize)]
14pub struct MetricDiff {
15    pub name: String,
16    pub baseline: f64,
17    pub current: f64,
18    pub change_pct: f64,
19    pub verdict: &'static str,
20}
21
22/// Compare two profiles and return metric diffs.
23pub fn diff_profiles(baseline: &FullProfile, current: &FullProfile) -> Vec<MetricDiff> {
24    let mut diffs = Vec::new();
25
26    // Timing
27    add_diff(
28        &mut diffs,
29        "wall_clock_time_us",
30        baseline.timing.wall_clock_time_us,
31        current.timing.wall_clock_time_us,
32        true, // lower is better
33    );
34
35    // Throughput
36    add_diff(
37        &mut diffs,
38        "tflops",
39        baseline.throughput.tflops,
40        current.throughput.tflops,
41        false, // higher is better
42    );
43    add_diff(
44        &mut diffs,
45        "bandwidth_gbps",
46        baseline.throughput.bandwidth_gbps,
47        current.throughput.bandwidth_gbps,
48        false,
49    );
50
51    // GPU compute
52    if let (Some(bg), Some(cg)) = (&baseline.gpu_compute, &current.gpu_compute) {
53        add_diff(
54            &mut diffs,
55            "sm_utilization_pct",
56            bg.sm_utilization_pct,
57            cg.sm_utilization_pct,
58            false,
59        );
60        add_diff(
61            &mut diffs,
62            "achieved_occupancy_pct",
63            bg.achieved_occupancy_pct,
64            cg.achieved_occupancy_pct,
65            false,
66        );
67        add_diff(
68            &mut diffs,
69            "warp_exec_efficiency_pct",
70            bg.warp_execution_efficiency_pct,
71            cg.warp_execution_efficiency_pct,
72            false,
73        );
74    }
75
76    // GPU memory
77    if let (Some(bm), Some(cm)) = (&baseline.gpu_memory, &current.gpu_memory) {
78        add_diff(
79            &mut diffs,
80            "l2_hit_rate_pct",
81            bm.l2_hit_rate_pct,
82            cm.l2_hit_rate_pct,
83            false,
84        );
85        add_diff(
86            &mut diffs,
87            "global_load_efficiency_pct",
88            bm.global_load_efficiency_pct,
89            cm.global_load_efficiency_pct,
90            false,
91        );
92    }
93
94    diffs
95}
96
97fn add_diff(
98    diffs: &mut Vec<MetricDiff>,
99    name: &str,
100    baseline: f64,
101    current: f64,
102    lower_better: bool,
103) {
104    if baseline == 0.0 && current == 0.0 {
105        return;
106    }
107    let change_pct = if baseline != 0.0 {
108        (current - baseline) / baseline * 100.0
109    } else {
110        0.0
111    };
112
113    let verdict = if change_pct.abs() < 2.0 {
114        "="
115    } else if lower_better {
116        if current < baseline {
117            "IMPROVED"
118        } else {
119            "REGRESSED"
120        }
121    } else if current > baseline {
122        "IMPROVED"
123    } else {
124        "REGRESSED"
125    };
126
127    diffs.push(MetricDiff {
128        name: name.to_string(),
129        baseline,
130        current,
131        change_pct,
132        verdict,
133    });
134}
135
136/// Render diff to stdout.
137pub fn render_diff(diffs: &[MetricDiff], baseline_name: &str, current_name: &str) {
138    println!("\n=== CGP Profile Diff ===\n");
139    println!("  Baseline: {baseline_name}");
140    println!("  Current:  {current_name}\n");
141
142    println!(
143        "  {:30} {:>14} {:>14} {:>10} {:>10}",
144        "Metric", "Baseline", "Current", "Change", "Verdict"
145    );
146    println!("  {}", "-".repeat(82));
147
148    for d in diffs {
149        let change_str = format!("{:+.1}%", d.change_pct);
150        println!(
151            "  {:30} {:>14.2} {:>14.2} {:>10} {:>10}",
152            d.name, d.baseline, d.current, change_str, d.verdict
153        );
154    }
155
156    // Summary
157    let regressions = diffs.iter().filter(|d| d.verdict == "REGRESSED").count();
158    let improvements = diffs.iter().filter(|d| d.verdict == "IMPROVED").count();
159    println!();
160    if regressions > 0 {
161        println!("  \x1b[31m{regressions} regression(s)\x1b[0m, {improvements} improvement(s)");
162    } else if improvements > 0 {
163        println!("  \x1b[32m{improvements} improvement(s)\x1b[0m, no regressions");
164    } else {
165        println!("  No significant changes.");
166    }
167    println!();
168}
169
170/// Run the `cgp diff` command.
171pub fn run_diff(
172    baseline: Option<&str>,
173    current: Option<&str>,
174    _before: Option<&str>,
175    _after: Option<&str>,
176    json: bool,
177) -> Result<()> {
178    let (baseline_path, current_path) = resolve_diff_paths(baseline, current)?;
179
180    let start = std::time::Instant::now();
181    let baseline_profile = export::load_json(Path::new(baseline_path))?;
182    let current_profile = export::load_json(Path::new(current_path))?;
183    let diffs = diff_profiles(&baseline_profile, &current_profile);
184
185    if json {
186        println!("{}", serde_json::to_string_pretty(&diffs)?);
187        return Ok(());
188    }
189
190    render_diff(&diffs, baseline_path, current_path);
191    print_statistical_summary(&baseline_profile, &current_profile);
192
193    let elapsed = start.elapsed();
194    println!(
195        "  Diff completed in {:.0}ms",
196        elapsed.as_secs_f64() * 1000.0
197    );
198    println!();
199
200    Ok(())
201}
202
203/// Resolve the `--baseline`/`--current` pair or bail with usage text.
204fn resolve_diff_paths<'a>(
205    baseline: Option<&'a str>,
206    current: Option<&'a str>,
207) -> Result<(&'a str, &'a str)> {
208    match (baseline, current) {
209        (Some(b), Some(c)) => Ok((b, c)),
210        _ => anyhow::bail!(
211            "Usage: cgp diff --baseline <file.json> --current <file.json>\n\
212             Or: cgp diff --before <commit> --after <commit> (not yet implemented)"
213        ),
214    }
215}
216
217/// Print statistical regression summary, using real multi-sample analysis when
218/// both profiles have `samples > 1`, otherwise a single-sample change direction.
219fn print_statistical_summary(baseline: &FullProfile, current: &FullProfile) {
220    if baseline.timing.samples > 1 && current.timing.samples > 1 {
221        print_multi_sample_regression(baseline, current);
222    } else {
223        print_single_sample_compare(baseline, current);
224    }
225}
226
227fn print_multi_sample_regression(baseline: &FullProfile, current: &FullProfile) {
228    let detector = RegressionDetector::new();
229    let b_samples = synth_samples(
230        baseline.timing.wall_clock_time_us,
231        baseline.timing.stddev_us,
232    );
233    let c_samples = synth_samples(current.timing.wall_clock_time_us, current.timing.stddev_us);
234    let result = detector.compare(&b_samples, &c_samples);
235    println!(
236        "  Statistical: {} (change {:.1}%, Cohen's d = {:.2})",
237        result.verdict, result.change_pct, result.effect_size_cohens_d
238    );
239}
240
241fn synth_samples(mean: f64, stddev: f64) -> Vec<f64> {
242    let std = stddev.max(mean * 0.01);
243    (0..30)
244        .map(|i| mean + std * ((i as f64 - 15.0) / 15.0))
245        .collect()
246}
247
248fn print_single_sample_compare(baseline: &FullProfile, current: &FullProfile) {
249    let b = baseline.timing.wall_clock_time_us;
250    let c = current.timing.wall_clock_time_us;
251    if b <= 0.0 || c <= 0.0 {
252        return;
253    }
254    let change = (c - b) / b * 100.0;
255    let verdict = if change < -5.0 {
256        "IMPROVED"
257    } else if change > 5.0 {
258        "REGRESSED"
259    } else {
260        "NO_CHANGE"
261    };
262    println!("  Statistical: {verdict} (change {change:.1}%, single-sample)");
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268    use crate::metrics::catalog::*;
269
270    fn make_profile(time_us: f64, tflops: f64) -> FullProfile {
271        FullProfile {
272            version: "2.0".to_string(),
273            timing: TimingMetrics {
274                wall_clock_time_us: time_us,
275                samples: 50,
276                ..Default::default()
277            },
278            throughput: ThroughputMetrics {
279                tflops,
280                ..Default::default()
281            },
282            ..Default::default()
283        }
284    }
285
286    #[test]
287    fn test_diff_improvement() {
288        let baseline = make_profile(35.7, 7.5);
289        let current = make_profile(23.2, 11.6);
290        let diffs = diff_profiles(&baseline, &current);
291
292        let time_diff = diffs
293            .iter()
294            .find(|d| d.name == "wall_clock_time_us")
295            .unwrap();
296        assert_eq!(time_diff.verdict, "IMPROVED"); // Lower is better
297        assert!(time_diff.change_pct < -30.0);
298
299        let tflops_diff = diffs.iter().find(|d| d.name == "tflops").unwrap();
300        assert_eq!(tflops_diff.verdict, "IMPROVED"); // Higher is better
301    }
302
303    #[test]
304    fn test_diff_regression() {
305        let baseline = make_profile(23.2, 11.6);
306        let current = make_profile(35.7, 7.5);
307        let diffs = diff_profiles(&baseline, &current);
308
309        let time_diff = diffs
310            .iter()
311            .find(|d| d.name == "wall_clock_time_us")
312            .unwrap();
313        assert_eq!(time_diff.verdict, "REGRESSED");
314    }
315
316    #[test]
317    fn test_diff_no_change() {
318        let baseline = make_profile(23.2, 11.6);
319        let current = make_profile(23.4, 11.5);
320        let diffs = diff_profiles(&baseline, &current);
321
322        let time_diff = diffs
323            .iter()
324            .find(|d| d.name == "wall_clock_time_us")
325            .unwrap();
326        assert_eq!(time_diff.verdict, "="); // <2% change
327    }
328
329    /// FALSIFY-CGP-062: diff must complete in <100ms.
330    #[test]
331    fn test_diff_speed() {
332        let baseline = make_profile(23.2, 11.6);
333        let current = make_profile(35.7, 7.5);
334
335        let start = std::time::Instant::now();
336        for _ in 0..100 {
337            let _ = diff_profiles(&baseline, &current);
338        }
339        let elapsed = start.elapsed();
340        // 100 diffs should take << 100ms
341        assert!(
342            elapsed.as_millis() < 100,
343            "100 diffs took {}ms",
344            elapsed.as_millis()
345        );
346    }
347}