1use serde::{Deserialize, Serialize};
4
5use super::suite::{BaselineEntry, BaselineReport, OptimizationSuite, WorkloadConfig};
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct BottleneckEntry {
10 pub workload: String,
12 pub size: usize,
14 pub efficiency: f64,
16 pub gflops: f64,
18 pub recommendation: String,
20 pub severity: BottleneckSeverity,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26pub enum BottleneckSeverity {
27 Critical,
29 Severe,
31 Moderate,
33 Unstable,
35}
36
37#[derive(Debug, Clone, Default, Serialize, Deserialize)]
39pub struct BottleneckAnalysis {
40 pub critical: Vec<BottleneckEntry>,
42 pub severe: Vec<BottleneckEntry>,
44 pub moderate: Vec<BottleneckEntry>,
46 pub unstable: Vec<BottleneckEntry>,
48 pub summary: AnalysisSummary,
50}
51
52#[derive(Debug, Clone, Default, Serialize, Deserialize)]
54pub struct AnalysisSummary {
55 pub total_configs: usize,
57 pub critical_count: usize,
59 pub severe_count: usize,
61 pub moderate_count: usize,
63 pub unstable_count: usize,
65 pub avg_efficiency: f64,
67 pub min_efficiency: f64,
69 pub max_efficiency: f64,
71}
72
73impl OptimizationSuite {
74 pub fn analyze_bottlenecks(&self, baseline: &BaselineReport) -> BottleneckAnalysis {
76 let mut analysis = BottleneckAnalysis::default();
77 let mut efficiencies = Vec::new();
78
79 for entry in &baseline.entries {
80 let workload = self.workloads.iter().find(|w| w.name == entry.workload);
81
82 let efficiency = entry.efficiency;
83 efficiencies.push(efficiency);
84
85 if efficiency < 0.25 {
87 analysis.critical.push(BottleneckEntry {
88 workload: entry.workload.clone(),
89 size: entry.size,
90 efficiency,
91 gflops: entry.gflops,
92 recommendation: Self::recommend_optimization(
93 workload,
94 entry,
95 BottleneckSeverity::Critical,
96 ),
97 severity: BottleneckSeverity::Critical,
98 });
99 } else if efficiency < 0.50 {
100 analysis.severe.push(BottleneckEntry {
101 workload: entry.workload.clone(),
102 size: entry.size,
103 efficiency,
104 gflops: entry.gflops,
105 recommendation: Self::recommend_optimization(
106 workload,
107 entry,
108 BottleneckSeverity::Severe,
109 ),
110 severity: BottleneckSeverity::Severe,
111 });
112 } else if efficiency < 0.75 {
113 analysis.moderate.push(BottleneckEntry {
114 workload: entry.workload.clone(),
115 size: entry.size,
116 efficiency,
117 gflops: entry.gflops,
118 recommendation: Self::recommend_optimization(
119 workload,
120 entry,
121 BottleneckSeverity::Moderate,
122 ),
123 severity: BottleneckSeverity::Moderate,
124 });
125 }
126
127 if entry.cv_percent > 15.0 {
129 analysis.unstable.push(BottleneckEntry {
130 workload: entry.workload.clone(),
131 size: entry.size,
132 efficiency,
133 gflops: entry.gflops,
134 recommendation: format!(
135 "High variance (CV={:.1}%) - check CPU governor with PERF-003 pattern, \
136 or reduce system load during benchmarks",
137 entry.cv_percent
138 ),
139 severity: BottleneckSeverity::Unstable,
140 });
141 }
142 }
143
144 analysis.summary = AnalysisSummary {
146 total_configs: baseline.entries.len(),
147 critical_count: analysis.critical.len(),
148 severe_count: analysis.severe.len(),
149 moderate_count: analysis.moderate.len(),
150 unstable_count: analysis.unstable.len(),
151 avg_efficiency: if efficiencies.is_empty() {
152 0.0
153 } else {
154 efficiencies.iter().sum::<f64>() / efficiencies.len() as f64
155 },
156 min_efficiency: efficiencies.iter().cloned().fold(f64::INFINITY, f64::min),
157 max_efficiency: efficiencies
158 .iter()
159 .cloned()
160 .fold(f64::NEG_INFINITY, f64::max),
161 };
162
163 analysis
164 }
165
166 fn recommend_optimization(
167 workload: Option<&WorkloadConfig>,
168 entry: &BaselineEntry,
169 severity: BottleneckSeverity,
170 ) -> String {
171 let is_memory_bound = workload.map(|w| w.memory_bound).unwrap_or(false);
172 let is_large = entry.size > 1_000_000;
173 let is_very_large = entry.size > 4_000_000;
174
175 match severity {
176 BottleneckSeverity::Critical => {
177 if entry.gflops < 1.0 {
178 "Critical: Near-zero throughput - verify SIMD codegen with `cargo asm`, \
179 check for scalar fallback"
180 .to_string()
181 } else if is_memory_bound && is_very_large {
182 "Critical: Memory bandwidth limited at large size - implement cache-aware \
183 tiling (PERF-001 pattern), consider prefetching"
184 .to_string()
185 } else {
186 "Critical: Profile with `perf record` or `renacer` to identify hotspot, \
187 check for branch mispredictions"
188 .to_string()
189 }
190 }
191 BottleneckSeverity::Severe => {
192 if is_memory_bound && is_large {
193 "Consider cache-aware tiling (PERF-001 pattern) for large memory-bound \
194 operations"
195 .to_string()
196 } else if entry.cv_percent > 10.0 {
197 format!(
198 "High variance (CV={:.1}%) - set CPU governor to 'performance' \
199 (PERF-003 pattern)",
200 entry.cv_percent
201 )
202 } else {
203 "Profile with `perf stat` to check IPC and cache misses".to_string()
204 }
205 }
206 BottleneckSeverity::Moderate => {
207 if is_memory_bound {
208 "Consider memory access pattern optimization (coalescing, prefetching)"
209 .to_string()
210 } else {
211 "Near optimal - minor gains possible with micro-optimizations".to_string()
212 }
213 }
214 BottleneckSeverity::Unstable => {
215 "Reduce measurement variance before optimizing".to_string()
216 }
217 }
218 }
219}
220
221impl BottleneckAnalysis {
222 pub fn format_report(&self) -> String {
224 let mut report = String::new();
225
226 report.push_str("# Bottleneck Analysis Report\n\n");
227 report.push_str(&format!(
228 "**Configurations Analyzed**: {}\n",
229 self.summary.total_configs
230 ));
231 report.push_str(&format!(
232 "**Average Efficiency**: {:.1}%\n",
233 self.summary.avg_efficiency * 100.0
234 ));
235 report.push_str(&format!(
236 "**Efficiency Range**: {:.1}% - {:.1}%\n\n",
237 self.summary.min_efficiency * 100.0,
238 self.summary.max_efficiency * 100.0
239 ));
240
241 if !self.critical.is_empty() {
242 report.push_str("## Critical Bottlenecks (< 25% efficiency)\n\n");
243 for b in &self.critical {
244 report.push_str(&format!(
245 "- **{}** @ {} elements: {:.1}% efficiency ({:.1} GFLOP/s)\n - {}\n\n",
246 b.workload,
247 b.size,
248 b.efficiency * 100.0,
249 b.gflops,
250 b.recommendation
251 ));
252 }
253 }
254
255 if !self.severe.is_empty() {
256 report.push_str("## Severe Bottlenecks (< 50% efficiency)\n\n");
257 for b in &self.severe {
258 report.push_str(&format!(
259 "- **{}** @ {} elements: {:.1}% efficiency ({:.1} GFLOP/s)\n - {}\n\n",
260 b.workload,
261 b.size,
262 b.efficiency * 100.0,
263 b.gflops,
264 b.recommendation
265 ));
266 }
267 }
268
269 if !self.moderate.is_empty() {
270 report.push_str("## Moderate Bottlenecks (< 75% efficiency)\n\n");
271 for b in &self.moderate {
272 report.push_str(&format!(
273 "- **{}** @ {} elements: {:.1}% efficiency ({:.1} GFLOP/s)\n - {}\n\n",
274 b.workload,
275 b.size,
276 b.efficiency * 100.0,
277 b.gflops,
278 b.recommendation
279 ));
280 }
281 }
282
283 if !self.unstable.is_empty() {
284 report.push_str("## Unstable Operations (CV > 15%)\n\n");
285 for b in &self.unstable {
286 report.push_str(&format!(
287 "- **{}** @ {} elements: {}\n\n",
288 b.workload, b.size, b.recommendation
289 ));
290 }
291 }
292
293 if self.critical.is_empty()
294 && self.severe.is_empty()
295 && self.moderate.is_empty()
296 && self.unstable.is_empty()
297 {
298 report.push_str(
299 "**All operations performing at >= 75% efficiency with stable measurements.**\n",
300 );
301 }
302
303 report
304 }
305}