Skip to main content

cbtop/bricks/analyzers/
efficiency.rs

1//! Efficiency analyzer brick (Layer 2)
2//!
3//! Calculates compute efficiency metrics based on theoretical vs actual throughput.
4//! Uses roofline model principles to determine bottleneck type.
5
6use crate::brick::{Brick, BrickAssertion, BrickBudget, BrickVerification};
7use std::any::Any;
8
9/// Efficiency classification
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
11pub enum EfficiencyClass {
12    /// Excellent efficiency (>90%)
13    Excellent,
14    /// Good efficiency (70-90%)
15    Good,
16    /// Fair efficiency (50-70%)
17    Fair,
18    /// Poor efficiency (<50%)
19    Poor,
20    /// Unknown (no data)
21    #[default]
22    Unknown,
23}
24
25impl EfficiencyClass {
26    /// Get efficiency class from percentage
27    pub fn from_percent(pct: f64) -> Self {
28        if pct >= 90.0 {
29            Self::Excellent
30        } else if pct >= 70.0 {
31            Self::Good
32        } else if pct >= 50.0 {
33            Self::Fair
34        } else if pct > 0.0 {
35            Self::Poor
36        } else {
37            Self::Unknown
38        }
39    }
40
41    /// Display name
42    pub fn name(&self) -> &'static str {
43        match self {
44            Self::Excellent => "Excellent",
45            Self::Good => "Good",
46            Self::Fair => "Fair",
47            Self::Poor => "Poor",
48            Self::Unknown => "Unknown",
49        }
50    }
51
52    /// Color hint for UI (green=good, red=poor)
53    pub fn color_hint(&self) -> (f32, f32, f32) {
54        match self {
55            Self::Excellent => (0.3, 1.0, 0.5), // Green
56            Self::Good => (0.5, 1.0, 0.3),      // Yellow-green
57            Self::Fair => (1.0, 0.8, 0.2),      // Yellow
58            Self::Poor => (1.0, 0.3, 0.2),      // Red
59            Self::Unknown => (0.5, 0.5, 0.5),   // Gray
60        }
61    }
62}
63
64/// Bottleneck type detected
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
66pub enum BottleneckType {
67    /// Compute-bound (FLOPS limited)
68    ComputeBound,
69    /// Memory-bound (bandwidth limited)
70    MemoryBound,
71    /// Latency-bound (serialization/dependency)
72    LatencyBound,
73    /// Thermal-bound (throttling)
74    ThermalBound,
75    /// PCIe-bound (transfer overhead)
76    PcieBound,
77    /// Balanced (no clear bottleneck)
78    #[default]
79    Balanced,
80}
81
82impl BottleneckType {
83    /// Display name
84    pub fn name(&self) -> &'static str {
85        match self {
86            Self::ComputeBound => "Compute-Bound",
87            Self::MemoryBound => "Memory-Bound",
88            Self::LatencyBound => "Latency-Bound",
89            Self::ThermalBound => "Thermal-Bound",
90            Self::PcieBound => "PCIe-Bound",
91            Self::Balanced => "Balanced",
92        }
93    }
94
95    /// Recommendation for improvement
96    pub fn recommendation(&self) -> &'static str {
97        match self {
98            Self::ComputeBound => "Increase parallelism or use higher FLOPS hardware",
99            Self::MemoryBound => "Improve data locality, use tiling, or increase bandwidth",
100            Self::LatencyBound => "Reduce dependencies, increase batch size",
101            Self::ThermalBound => "Improve cooling or reduce power target",
102            Self::PcieBound => "Batch transfers, use pinned memory, or compute on device",
103            Self::Balanced => "System is well balanced - no single bottleneck",
104        }
105    }
106}
107
108/// Efficiency metrics
109#[derive(Debug, Clone, Default)]
110pub struct EfficiencyMetrics {
111    /// Compute efficiency (actual FLOPS / peak FLOPS * 100)
112    pub compute_efficiency: f64,
113    /// Memory efficiency (actual bandwidth / peak bandwidth * 100)
114    pub memory_efficiency: f64,
115    /// Overall efficiency (weighted combination)
116    pub overall_efficiency: f64,
117    /// Arithmetic intensity (FLOPS per byte)
118    pub arithmetic_intensity: f64,
119    /// Primary bottleneck
120    pub bottleneck: BottleneckType,
121    /// Efficiency classification
122    pub classification: EfficiencyClass,
123}
124
125impl EfficiencyMetrics {
126    /// Create metrics from raw measurements
127    pub fn calculate(
128        actual_flops: f64,
129        peak_flops: f64,
130        actual_bandwidth: f64,
131        peak_bandwidth: f64,
132        operations: u64,
133        bytes_transferred: u64,
134    ) -> Self {
135        let compute_efficiency = if peak_flops > 0.0 {
136            (actual_flops / peak_flops * 100.0).min(100.0)
137        } else {
138            0.0
139        };
140
141        let memory_efficiency = if peak_bandwidth > 0.0 {
142            (actual_bandwidth / peak_bandwidth * 100.0).min(100.0)
143        } else {
144            0.0
145        };
146
147        let arithmetic_intensity = if bytes_transferred > 0 {
148            operations as f64 / bytes_transferred as f64
149        } else {
150            0.0
151        };
152
153        // Determine bottleneck based on relative efficiencies
154        let bottleneck = if compute_efficiency < 30.0 && memory_efficiency < 30.0 {
155            BottleneckType::LatencyBound
156        } else if compute_efficiency > memory_efficiency + 20.0 {
157            BottleneckType::MemoryBound
158        } else if memory_efficiency > compute_efficiency + 20.0 {
159            BottleneckType::ComputeBound
160        } else {
161            BottleneckType::Balanced
162        };
163
164        // Overall efficiency is weighted average
165        let overall_efficiency = (compute_efficiency + memory_efficiency) / 2.0;
166        let classification = EfficiencyClass::from_percent(overall_efficiency);
167
168        Self {
169            compute_efficiency,
170            memory_efficiency,
171            overall_efficiency,
172            arithmetic_intensity,
173            bottleneck,
174            classification,
175        }
176    }
177}
178
179/// Efficiency analyzer brick
180pub struct EfficiencyAnalyzerBrick {
181    /// Current efficiency metrics
182    pub metrics: EfficiencyMetrics,
183    /// Peak FLOPS for the device (GFLOPS)
184    pub peak_flops: f64,
185    /// Peak memory bandwidth (GB/s)
186    pub peak_bandwidth: f64,
187    /// History of overall efficiency samples
188    pub efficiency_history: Vec<f64>,
189    /// History length limit
190    pub history_limit: usize,
191}
192
193impl EfficiencyAnalyzerBrick {
194    /// Create a new efficiency analyzer with device specs
195    pub fn new(peak_flops: f64, peak_bandwidth: f64) -> Self {
196        Self {
197            metrics: EfficiencyMetrics::default(),
198            peak_flops,
199            peak_bandwidth,
200            efficiency_history: Vec::new(),
201            history_limit: 120,
202        }
203    }
204
205    /// Create with reasonable defaults for a mid-range GPU
206    pub fn with_defaults() -> Self {
207        // Reasonable defaults: ~10 TFLOPS, ~500 GB/s
208        Self::new(10000.0, 500.0)
209    }
210
211    /// Update metrics with new measurements
212    pub fn update(
213        &mut self,
214        actual_flops: f64,
215        actual_bandwidth: f64,
216        operations: u64,
217        bytes_transferred: u64,
218    ) {
219        self.metrics = EfficiencyMetrics::calculate(
220            actual_flops,
221            self.peak_flops,
222            actual_bandwidth,
223            self.peak_bandwidth,
224            operations,
225            bytes_transferred,
226        );
227
228        // Track history
229        self.efficiency_history
230            .push(self.metrics.overall_efficiency);
231        if self.efficiency_history.len() > self.history_limit {
232            self.efficiency_history.remove(0);
233        }
234    }
235
236    /// Set thermal throttling detected
237    pub fn set_thermal_throttling(&mut self, is_throttling: bool) {
238        if is_throttling {
239            self.metrics.bottleneck = BottleneckType::ThermalBound;
240        }
241    }
242
243    /// Set PCIe bottleneck detected
244    pub fn set_pcie_bottleneck(&mut self, is_bottleneck: bool) {
245        if is_bottleneck && self.metrics.bottleneck == BottleneckType::Balanced {
246            self.metrics.bottleneck = BottleneckType::PcieBound;
247        }
248    }
249
250    /// Get average efficiency over history
251    pub fn average_efficiency(&self) -> f64 {
252        if self.efficiency_history.is_empty() {
253            return 0.0;
254        }
255        self.efficiency_history.iter().sum::<f64>() / self.efficiency_history.len() as f64
256    }
257
258    /// Get efficiency trend (positive = improving, negative = degrading)
259    pub fn efficiency_trend(&self) -> f64 {
260        if self.efficiency_history.len() < 10 {
261            return 0.0;
262        }
263        let recent: f64 = self.efficiency_history.iter().rev().take(5).sum::<f64>() / 5.0;
264        let older: f64 = self
265            .efficiency_history
266            .iter()
267            .rev()
268            .skip(5)
269            .take(5)
270            .sum::<f64>()
271            / 5.0;
272        recent - older
273    }
274}
275
276impl Default for EfficiencyAnalyzerBrick {
277    fn default() -> Self {
278        Self::with_defaults()
279    }
280}
281
282impl Brick for EfficiencyAnalyzerBrick {
283    fn brick_name(&self) -> &'static str {
284        "efficiency_analyzer"
285    }
286
287    fn assertions(&self) -> Vec<BrickAssertion> {
288        vec![BrickAssertion::max_latency_ms(4)]
289    }
290
291    fn budget(&self) -> BrickBudget {
292        BrickBudget::FRAME_60FPS
293    }
294
295    fn verify(&self) -> BrickVerification {
296        let mut v = BrickVerification::new();
297        for assertion in self.assertions() {
298            v.check(&assertion);
299        }
300        v
301    }
302
303    fn as_any(&self) -> &dyn Any {
304        self
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn test_efficiency_analyzer_brick_name() {
314        let analyzer = EfficiencyAnalyzerBrick::with_defaults();
315        assert_eq!(analyzer.brick_name(), "efficiency_analyzer");
316    }
317
318    #[test]
319    fn test_efficiency_class_from_percent() {
320        assert_eq!(
321            EfficiencyClass::from_percent(95.0),
322            EfficiencyClass::Excellent
323        );
324        assert_eq!(EfficiencyClass::from_percent(80.0), EfficiencyClass::Good);
325        assert_eq!(EfficiencyClass::from_percent(60.0), EfficiencyClass::Fair);
326        assert_eq!(EfficiencyClass::from_percent(30.0), EfficiencyClass::Poor);
327        assert_eq!(EfficiencyClass::from_percent(0.0), EfficiencyClass::Unknown);
328    }
329
330    #[test]
331    fn test_efficiency_metrics_calculate() {
332        let metrics = EfficiencyMetrics::calculate(
333            5000.0,    // actual GFLOPS
334            10000.0,   // peak GFLOPS
335            250.0,     // actual GB/s
336            500.0,     // peak GB/s
337            1_000_000, // operations
338            100_000,   // bytes
339        );
340
341        assert!((metrics.compute_efficiency - 50.0).abs() < 0.01);
342        assert!((metrics.memory_efficiency - 50.0).abs() < 0.01);
343        assert!((metrics.overall_efficiency - 50.0).abs() < 0.01);
344        assert!((metrics.arithmetic_intensity - 10.0).abs() < 0.01);
345        assert_eq!(metrics.bottleneck, BottleneckType::Balanced);
346        assert_eq!(metrics.classification, EfficiencyClass::Fair);
347    }
348
349    #[test]
350    fn test_memory_bound_detection() {
351        let metrics = EfficiencyMetrics::calculate(
352            9000.0, // high compute utilization
353            10000.0, 100.0, // low memory utilization
354            500.0, 1_000_000, 100_000,
355        );
356
357        assert_eq!(metrics.bottleneck, BottleneckType::MemoryBound);
358    }
359
360    #[test]
361    fn test_compute_bound_detection() {
362        let metrics = EfficiencyMetrics::calculate(
363            1000.0, // low compute utilization
364            10000.0, 450.0, // high memory utilization
365            500.0, 1_000_000, 100_000,
366        );
367
368        assert_eq!(metrics.bottleneck, BottleneckType::ComputeBound);
369    }
370
371    #[test]
372    fn test_latency_bound_detection() {
373        let metrics = EfficiencyMetrics::calculate(
374            100.0, // very low compute
375            10000.0, 50.0, // very low memory
376            500.0, 1_000_000, 100_000,
377        );
378
379        assert_eq!(metrics.bottleneck, BottleneckType::LatencyBound);
380    }
381
382    #[test]
383    fn test_efficiency_history() {
384        let mut analyzer = EfficiencyAnalyzerBrick::new(10000.0, 500.0);
385
386        for i in 0..10 {
387            analyzer.update((5000 + i * 100) as f64, 250.0, 1_000_000, 100_000);
388        }
389
390        assert_eq!(analyzer.efficiency_history.len(), 10);
391        assert!(analyzer.average_efficiency() > 0.0);
392    }
393
394    #[test]
395    fn test_efficiency_trend() {
396        let mut analyzer = EfficiencyAnalyzerBrick::new(10000.0, 500.0);
397
398        // Add improving efficiency samples
399        for i in 0..15 {
400            analyzer.update((2000 + i * 500) as f64, 250.0, 1_000_000, 100_000);
401        }
402
403        let trend = analyzer.efficiency_trend();
404        assert!(
405            trend > 0.0,
406            "Trend should be positive for improving efficiency"
407        );
408    }
409
410    #[test]
411    fn test_history_limit() {
412        let mut analyzer = EfficiencyAnalyzerBrick::new(10000.0, 500.0);
413        analyzer.history_limit = 10;
414
415        for _ in 0..20 {
416            analyzer.update(5000.0, 250.0, 1_000_000, 100_000);
417        }
418
419        assert_eq!(analyzer.efficiency_history.len(), 10);
420    }
421
422    #[test]
423    fn test_bottleneck_recommendations() {
424        assert!(!BottleneckType::ComputeBound.recommendation().is_empty());
425        assert!(!BottleneckType::MemoryBound.recommendation().is_empty());
426        assert!(!BottleneckType::LatencyBound.recommendation().is_empty());
427        assert!(!BottleneckType::ThermalBound.recommendation().is_empty());
428        assert!(!BottleneckType::PcieBound.recommendation().is_empty());
429        assert!(!BottleneckType::Balanced.recommendation().is_empty());
430    }
431
432    #[test]
433    fn test_set_thermal_throttling() {
434        let mut analyzer = EfficiencyAnalyzerBrick::with_defaults();
435        analyzer.update(5000.0, 250.0, 1_000_000, 100_000);
436
437        analyzer.set_thermal_throttling(true);
438        assert_eq!(analyzer.metrics.bottleneck, BottleneckType::ThermalBound);
439    }
440
441    #[test]
442    fn test_efficiency_class_color_hints() {
443        let (r, g, _b) = EfficiencyClass::Excellent.color_hint();
444        assert!(g > r, "Excellent should be greenish");
445
446        let (r, g, _) = EfficiencyClass::Poor.color_hint();
447        assert!(r > g, "Poor should be reddish");
448    }
449}