1use crate::blame::{BlameRequest, BlameUseCase};
2use anyhow::Context;
3use perfgate_domain::{BinaryBlame, DependencyChangeType};
4use perfgate_types::{CompareReceipt, Metric, MetricStatus};
5use std::fs;
6use std::path::PathBuf;
7
8pub struct ExplainRequest {
9 pub compare: PathBuf,
10 pub baseline_lock: Option<PathBuf>,
11 pub current_lock: Option<PathBuf>,
12}
13
14pub struct ExplainOutcome {
15 pub markdown: String,
16}
17
18pub struct ExplainUseCase;
19
20impl ExplainUseCase {
21 pub fn execute(&self, req: ExplainRequest) -> anyhow::Result<ExplainOutcome> {
22 let content = fs::read_to_string(&req.compare)
23 .with_context(|| format!("failed to read {:?}", req.compare))?;
24 let compare: CompareReceipt =
25 serde_json::from_str(&content).context("failed to parse JSON")?;
26
27 let mut md = String::new();
28 md.push_str(&format!(
29 "# Performance Analysis for `{}`\n\n",
30 compare.bench.name
31 ));
32
33 if compare.verdict.status == perfgate_types::VerdictStatus::Pass {
34 md.push_str("✅ **Great news!** No significant performance regressions were detected in this run.\n");
35 return Ok(ExplainOutcome { markdown: md });
36 }
37
38 md.push_str("⚠️ **Performance Regressions Detected**\n\n");
39 md.push_str("The following metrics exceeded their budgets. Below are automated playbooks to help diagnose and resolve the issues:\n\n");
40
41 let blame = if let (Some(base), Some(curr)) = (req.baseline_lock, req.current_lock) {
42 let blame_usecase = BlameUseCase;
43 blame_usecase
44 .execute(BlameRequest {
45 baseline_lock: base,
46 current_lock: curr,
47 })
48 .ok()
49 .map(|o| o.blame)
50 } else {
51 None
52 };
53
54 for (metric, delta) in &compare.deltas {
55 if delta.status == MetricStatus::Fail || delta.status == MetricStatus::Warn {
56 let threshold = compare
57 .budgets
58 .get(metric)
59 .map(|b| {
60 if delta.status == MetricStatus::Fail {
61 b.threshold
62 } else {
63 b.warn_threshold
64 }
65 })
66 .unwrap_or(0.0);
67 md.push_str(&format!("## {}\n", metric.as_str()));
68 md.push_str(&format!(
69 "**Regression**: {:.2}% (Threshold: {:.2}%)\n\n",
70 delta.regression * 100.0,
71 threshold * 100.0
72 ));
73 md.push_str(&Self::playbook_for_metric(metric, blame.as_ref()));
74 md.push('\n');
75 }
76 }
77
78 md.push_str("---\n\n");
79 md.push_str("### 🤖 LLM Prompt\n");
80 md.push_str("Copy the text below and paste it into an LLM (like Gemini, ChatGPT, or Claude) along with your PR diff to get a detailed explanation:\n\n");
81 md.push_str("```text\n");
82 md.push_str("Act as a senior performance engineer. I have a performance regression in my pull request.\n\n");
83 md.push_str(&format!("Benchmark: {}\n", compare.bench.name));
84 for (metric, delta) in &compare.deltas {
85 if delta.status == MetricStatus::Fail || delta.status == MetricStatus::Warn {
86 md.push_str(&format!(
87 "- {} degraded by {:.2}%\n",
88 metric.as_str(),
89 delta.regression * 100.0
90 ));
91 }
92 }
93
94 if let Some(b) = &blame {
95 md.push_str("\nDetected Dependency Changes (Binary Blame):\n");
96 for change in &b.changes {
97 match change.change_type {
98 DependencyChangeType::Added => {
99 md.push_str(&format!(
100 " - Added: {} v{}\n",
101 change.name,
102 change.new_version.as_deref().unwrap_or("?")
103 ));
104 }
105 DependencyChangeType::Removed => {
106 md.push_str(&format!(
107 " - Removed: {} v{}\n",
108 change.name,
109 change.old_version.as_deref().unwrap_or("?")
110 ));
111 }
112 DependencyChangeType::Updated => {
113 md.push_str(&format!(
114 " - Updated: {} ({} -> {})\n",
115 change.name,
116 change.old_version.as_deref().unwrap_or("?"),
117 change.new_version.as_deref().unwrap_or("?")
118 ));
119 }
120 }
121 }
122 }
123
124 md.push_str("\nPlease analyze the attached code diff and explain what changes might have caused these specific metric regressions. Suggest code optimizations to fix the issue.\n");
125 md.push_str("```\n");
126
127 Ok(ExplainOutcome { markdown: md })
128 }
129
130 fn playbook_for_metric(metric: &Metric, blame: Option<&BinaryBlame>) -> String {
131 match metric {
132 Metric::WallMs => "### Wall Time Playbook\n- **Check for blocking I/O**: Are you doing disk or network operations on the main thread?\n- **Algorithm Complexity**: Did you add nested loops or expensive sorts?\n- **Lock Contention**: Check for deadlocks or heavy mutex usage in concurrent code.".to_string(),
133 Metric::CpuMs => "### CPU Time Playbook\n- **Hot Loops**: Profile the code (e.g. using `perf` or `flamegraph`) to find where CPU time is spent.\n- **Allocation Overhead**: Did you add unnecessary clones or heap allocations inside a loop?\n- **Inlining**: Ensure small, frequently called functions are inlined.".to_string(),
134 Metric::MaxRssKb => "### Peak Memory (RSS) Playbook\n- **Memory Leaks**: Check if you are retaining references to objects that should be dropped.\n- **Buffer Sizing**: Are you pre-allocating extremely large buffers? Consider streaming or chunking data.\n- **Data Structures**: Can you use more memory-efficient data structures (e.g. `Box<[T]>` instead of `Vec<T>`)?".to_string(),
135 Metric::IoReadBytes => "### Disk Read Playbook\n- **Redundant Reads**: Are you reading the same file multiple times?\n- **Buffering**: Use buffered readers (`BufReader`) to reduce syscalls.\n- **Lazy Loading**: Delay reading file contents until strictly necessary.".to_string(),
136 Metric::IoWriteBytes => "### Disk Write Playbook\n- **Redundant Writes**: Can you batch writes in memory before flushing to disk?\n- **Buffering**: Use `BufWriter` for many small writes.\n- **Log Level**: Did you accidentally leave verbose logging enabled?".to_string(),
137 Metric::NetworkPackets => "### Network Playbook\n- **Batching**: Are you making N+1 API queries? Consolidate them into a bulk endpoint.\n- **Caching**: Cache immutable remote resources instead of fetching them repeatedly.\n- **Connection Pooling**: Are you opening a new TCP connection for every request? Reuse connections.".to_string(),
138 Metric::CtxSwitches => "### Context Switch Playbook\n- **Thread Thrashing**: Are you spawning too many threads for CPU-bound work? (Match thread count to physical cores).\n- **Async Yielding**: Are you yielding too often in an async executor?\n- **Lock Contention**: High context switches often point to threads repeatedly waking up and going back to sleep on a lock.".to_string(),
139 Metric::PageFaults => "### Page Faults Playbook\n- **Memory Thrashing**: You might be allocating memory faster than the OS can provide physical pages. Pre-allocate and reuse memory buffers.\n- **Memory Mapping**: If using `mmap`, sequential access is better than random access for triggering pre-fetching.".to_string(),
140 Metric::BinaryBytes => {
141 let mut playbook = "### Binary Size Playbook\n- **Dependency Bloat**: Run `cargo tree` to see if a heavy dependency was introduced. Use the perfgate Binary Blame feature.\n- **Monomorphization**: Heavy use of generics can lead to code bloat. Try using trait objects (`dyn Trait`) in cold paths.\n- **Debug Info**: Ensure you are stripping debug symbols in release builds.".to_string();
142 if let Some(b) = blame {
143 playbook.push_str("\n\n**Binary Blame Analysis**:\n");
144 if b.changes.is_empty() {
145 playbook.push_str("No dependency changes detected in Cargo.lock.\n");
146 } else {
147 playbook.push_str(&format!("Detected {} dependency changes:\n", b.changes.len()));
148 for change in b.changes.iter().take(10) {
149 playbook.push_str(&format!("- {} ({:?})\n", change.name, change.change_type));
150 }
151 if b.changes.len() > 10 {
152 playbook.push_str(&format!("- ... and {} more.\n", b.changes.len() - 10));
153 }
154 }
155 }
156 playbook
157 },
158 Metric::ThroughputPerS => "### Throughput Playbook\n- **Bottlenecks**: A drop in throughput usually indicates a bottleneck in CPU or I/O. Consult the Wall Time and CPU playbooks.\n- **Concurrency Limit**: Check if a semaphore or connection pool is artificially limiting concurrent work units.".to_string(),
159 Metric::EnergyUj => "### Energy Efficiency Playbook\n- **Busy Waiting**: Are you using `spin` loops? Use OS-backed blocking primitives instead.\n- **High CPU Utilization**: Energy correlates strongly with CPU time. Optimize your algorithms to do less work.\n- **Polling**: Switch from polling models to event-driven (interrupt-based) architectures.".to_string(),
160 }
161 }
162}