Skip to main content

cbtop/optimize/
bottleneck.rs

1//! Bottleneck analysis for performance identification (OPT-002).
2
3use serde::{Deserialize, Serialize};
4
5use super::suite::{BaselineEntry, BaselineReport, OptimizationSuite, WorkloadConfig};
6
7/// Entry describing a performance bottleneck
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct BottleneckEntry {
10    /// Workload name
11    pub workload: String,
12    /// Problem size where bottleneck occurs
13    pub size: usize,
14    /// Achieved efficiency (0.0 - 1.0)
15    pub efficiency: f64,
16    /// Achieved GFLOP/s
17    pub gflops: f64,
18    /// Recommendation for improvement
19    pub recommendation: String,
20    /// Severity level
21    pub severity: BottleneckSeverity,
22}
23
24/// Severity level of a bottleneck
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26pub enum BottleneckSeverity {
27    /// Critical: < 25% efficiency
28    Critical,
29    /// Severe: < 50% efficiency
30    Severe,
31    /// Moderate: < 75% efficiency
32    Moderate,
33    /// Unstable: High CV (> 15%)
34    Unstable,
35}
36
37/// Results of bottleneck analysis
38#[derive(Debug, Clone, Default, Serialize, Deserialize)]
39pub struct BottleneckAnalysis {
40    /// Critical bottlenecks (< 25% efficiency)
41    pub critical: Vec<BottleneckEntry>,
42    /// Severe bottlenecks (< 50% efficiency)
43    pub severe: Vec<BottleneckEntry>,
44    /// Moderate bottlenecks (< 75% efficiency)
45    pub moderate: Vec<BottleneckEntry>,
46    /// Unstable operations (CV > 15%)
47    pub unstable: Vec<BottleneckEntry>,
48    /// Summary statistics
49    pub summary: AnalysisSummary,
50}
51
52/// Summary statistics for the analysis
53#[derive(Debug, Clone, Default, Serialize, Deserialize)]
54pub struct AnalysisSummary {
55    /// Total configurations analyzed
56    pub total_configs: usize,
57    /// Number of critical bottlenecks
58    pub critical_count: usize,
59    /// Number of severe bottlenecks
60    pub severe_count: usize,
61    /// Number of moderate bottlenecks
62    pub moderate_count: usize,
63    /// Number of unstable operations
64    pub unstable_count: usize,
65    /// Average efficiency across all configs
66    pub avg_efficiency: f64,
67    /// Worst efficiency found
68    pub min_efficiency: f64,
69    /// Best efficiency found
70    pub max_efficiency: f64,
71}
72
73impl OptimizationSuite {
74    /// Analyze baseline for bottlenecks
75    pub fn analyze_bottlenecks(&self, baseline: &BaselineReport) -> BottleneckAnalysis {
76        let mut analysis = BottleneckAnalysis::default();
77        let mut efficiencies = Vec::new();
78
79        for entry in &baseline.entries {
80            let workload = self.workloads.iter().find(|w| w.name == entry.workload);
81
82            let efficiency = entry.efficiency;
83            efficiencies.push(efficiency);
84
85            // Check efficiency thresholds
86            if efficiency < 0.25 {
87                analysis.critical.push(BottleneckEntry {
88                    workload: entry.workload.clone(),
89                    size: entry.size,
90                    efficiency,
91                    gflops: entry.gflops,
92                    recommendation: Self::recommend_optimization(
93                        workload,
94                        entry,
95                        BottleneckSeverity::Critical,
96                    ),
97                    severity: BottleneckSeverity::Critical,
98                });
99            } else if efficiency < 0.50 {
100                analysis.severe.push(BottleneckEntry {
101                    workload: entry.workload.clone(),
102                    size: entry.size,
103                    efficiency,
104                    gflops: entry.gflops,
105                    recommendation: Self::recommend_optimization(
106                        workload,
107                        entry,
108                        BottleneckSeverity::Severe,
109                    ),
110                    severity: BottleneckSeverity::Severe,
111                });
112            } else if efficiency < 0.75 {
113                analysis.moderate.push(BottleneckEntry {
114                    workload: entry.workload.clone(),
115                    size: entry.size,
116                    efficiency,
117                    gflops: entry.gflops,
118                    recommendation: Self::recommend_optimization(
119                        workload,
120                        entry,
121                        BottleneckSeverity::Moderate,
122                    ),
123                    severity: BottleneckSeverity::Moderate,
124                });
125            }
126
127            // Check stability (CV > 15%)
128            if entry.cv_percent > 15.0 {
129                analysis.unstable.push(BottleneckEntry {
130                    workload: entry.workload.clone(),
131                    size: entry.size,
132                    efficiency,
133                    gflops: entry.gflops,
134                    recommendation: format!(
135                        "High variance (CV={:.1}%) - check CPU governor with PERF-003 pattern, \
136                         or reduce system load during benchmarks",
137                        entry.cv_percent
138                    ),
139                    severity: BottleneckSeverity::Unstable,
140                });
141            }
142        }
143
144        // Calculate summary
145        analysis.summary = AnalysisSummary {
146            total_configs: baseline.entries.len(),
147            critical_count: analysis.critical.len(),
148            severe_count: analysis.severe.len(),
149            moderate_count: analysis.moderate.len(),
150            unstable_count: analysis.unstable.len(),
151            avg_efficiency: if efficiencies.is_empty() {
152                0.0
153            } else {
154                efficiencies.iter().sum::<f64>() / efficiencies.len() as f64
155            },
156            min_efficiency: efficiencies.iter().cloned().fold(f64::INFINITY, f64::min),
157            max_efficiency: efficiencies
158                .iter()
159                .cloned()
160                .fold(f64::NEG_INFINITY, f64::max),
161        };
162
163        analysis
164    }
165
166    fn recommend_optimization(
167        workload: Option<&WorkloadConfig>,
168        entry: &BaselineEntry,
169        severity: BottleneckSeverity,
170    ) -> String {
171        let is_memory_bound = workload.map(|w| w.memory_bound).unwrap_or(false);
172        let is_large = entry.size > 1_000_000;
173        let is_very_large = entry.size > 4_000_000;
174
175        match severity {
176            BottleneckSeverity::Critical => {
177                if entry.gflops < 1.0 {
178                    "Critical: Near-zero throughput - verify SIMD codegen with `cargo asm`, \
179                     check for scalar fallback"
180                        .to_string()
181                } else if is_memory_bound && is_very_large {
182                    "Critical: Memory bandwidth limited at large size - implement cache-aware \
183                     tiling (PERF-001 pattern), consider prefetching"
184                        .to_string()
185                } else {
186                    "Critical: Profile with `perf record` or `renacer` to identify hotspot, \
187                     check for branch mispredictions"
188                        .to_string()
189                }
190            }
191            BottleneckSeverity::Severe => {
192                if is_memory_bound && is_large {
193                    "Consider cache-aware tiling (PERF-001 pattern) for large memory-bound \
194                     operations"
195                        .to_string()
196                } else if entry.cv_percent > 10.0 {
197                    format!(
198                        "High variance (CV={:.1}%) - set CPU governor to 'performance' \
199                         (PERF-003 pattern)",
200                        entry.cv_percent
201                    )
202                } else {
203                    "Profile with `perf stat` to check IPC and cache misses".to_string()
204                }
205            }
206            BottleneckSeverity::Moderate => {
207                if is_memory_bound {
208                    "Consider memory access pattern optimization (coalescing, prefetching)"
209                        .to_string()
210                } else {
211                    "Near optimal - minor gains possible with micro-optimizations".to_string()
212                }
213            }
214            BottleneckSeverity::Unstable => {
215                "Reduce measurement variance before optimizing".to_string()
216            }
217        }
218    }
219}
220
221impl BottleneckAnalysis {
222    /// Format analysis as human-readable report
223    pub fn format_report(&self) -> String {
224        let mut report = String::new();
225
226        report.push_str("# Bottleneck Analysis Report\n\n");
227        report.push_str(&format!(
228            "**Configurations Analyzed**: {}\n",
229            self.summary.total_configs
230        ));
231        report.push_str(&format!(
232            "**Average Efficiency**: {:.1}%\n",
233            self.summary.avg_efficiency * 100.0
234        ));
235        report.push_str(&format!(
236            "**Efficiency Range**: {:.1}% - {:.1}%\n\n",
237            self.summary.min_efficiency * 100.0,
238            self.summary.max_efficiency * 100.0
239        ));
240
241        if !self.critical.is_empty() {
242            report.push_str("## Critical Bottlenecks (< 25% efficiency)\n\n");
243            for b in &self.critical {
244                report.push_str(&format!(
245                    "- **{}** @ {} elements: {:.1}% efficiency ({:.1} GFLOP/s)\n  - {}\n\n",
246                    b.workload,
247                    b.size,
248                    b.efficiency * 100.0,
249                    b.gflops,
250                    b.recommendation
251                ));
252            }
253        }
254
255        if !self.severe.is_empty() {
256            report.push_str("## Severe Bottlenecks (< 50% efficiency)\n\n");
257            for b in &self.severe {
258                report.push_str(&format!(
259                    "- **{}** @ {} elements: {:.1}% efficiency ({:.1} GFLOP/s)\n  - {}\n\n",
260                    b.workload,
261                    b.size,
262                    b.efficiency * 100.0,
263                    b.gflops,
264                    b.recommendation
265                ));
266            }
267        }
268
269        if !self.moderate.is_empty() {
270            report.push_str("## Moderate Bottlenecks (< 75% efficiency)\n\n");
271            for b in &self.moderate {
272                report.push_str(&format!(
273                    "- **{}** @ {} elements: {:.1}% efficiency ({:.1} GFLOP/s)\n  - {}\n\n",
274                    b.workload,
275                    b.size,
276                    b.efficiency * 100.0,
277                    b.gflops,
278                    b.recommendation
279                ));
280            }
281        }
282
283        if !self.unstable.is_empty() {
284            report.push_str("## Unstable Operations (CV > 15%)\n\n");
285            for b in &self.unstable {
286                report.push_str(&format!(
287                    "- **{}** @ {} elements: {}\n\n",
288                    b.workload, b.size, b.recommendation
289                ));
290            }
291        }
292
293        if self.critical.is_empty()
294            && self.severe.is_empty()
295            && self.moderate.is_empty()
296            && self.unstable.is_empty()
297        {
298            report.push_str(
299                "**All operations performing at >= 75% efficiency with stable measurements.**\n",
300            );
301        }
302
303        report
304    }
305}