cbtop/backend_regression/
types.rs

1//! Core types for cross-backend regression detection.
2
3/// Backend identifier
4#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
5pub enum Backend {
6    /// Scalar (no SIMD)
7    Scalar,
8    /// SSE2 (128-bit)
9    Sse2,
10    /// AVX2 (256-bit)
11    Avx2,
12    /// AVX-512 (512-bit)
13    Avx512,
14    /// NEON (ARM)
15    Neon,
16    /// CUDA (NVIDIA GPU)
17    Cuda,
18    /// Metal (Apple GPU)
19    Metal,
20    /// Vulkan (Cross-platform GPU)
21    Vulkan,
22    /// WebGPU
23    WebGpu,
24}
25
26impl Backend {
27    /// Get backend name
28    pub fn name(&self) -> &'static str {
29        match self {
30            Self::Scalar => "Scalar",
31            Self::Sse2 => "SSE2",
32            Self::Avx2 => "AVX2",
33            Self::Avx512 => "AVX-512",
34            Self::Neon => "NEON",
35            Self::Cuda => "CUDA",
36            Self::Metal => "Metal",
37            Self::Vulkan => "Vulkan",
38            Self::WebGpu => "WebGPU",
39        }
40    }
41
42    /// Is this a GPU backend?
43    pub fn is_gpu(&self) -> bool {
44        matches!(self, Self::Cuda | Self::Metal | Self::Vulkan | Self::WebGpu)
45    }
46
47    /// Is this a SIMD backend?
48    pub fn is_simd(&self) -> bool {
49        matches!(self, Self::Sse2 | Self::Avx2 | Self::Avx512 | Self::Neon)
50    }
51
52    /// Get expected speedup over scalar (theoretical)
53    pub fn theoretical_speedup(&self) -> f64 {
54        match self {
55            Self::Scalar => 1.0,
56            Self::Sse2 => 4.0,    // 128-bit / 32-bit = 4
57            Self::Avx2 => 8.0,    // 256-bit / 32-bit = 8
58            Self::Avx512 => 16.0, // 512-bit / 32-bit = 16
59            Self::Neon => 4.0,    // 128-bit / 32-bit = 4
60            Self::Cuda => 100.0,  // Variable, placeholder
61            Self::Metal => 50.0,  // Variable, placeholder
62            Self::Vulkan => 50.0, // Variable, placeholder
63            Self::WebGpu => 30.0, // Variable, placeholder
64        }
65    }
66}
67
68/// Workload type for benchmark
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
70pub enum WorkloadType {
71    /// Matrix multiplication
72    Gemm,
73    /// 2D convolution
74    Conv2d,
75    /// Element-wise operations
76    Elementwise,
77    /// Reduction (sum, mean)
78    Reduction,
79    /// Attention mechanism
80    Attention,
81    /// Memory bandwidth test
82    Bandwidth,
83}
84
85impl WorkloadType {
86    /// Get workload name
87    pub fn name(&self) -> &'static str {
88        match self {
89            Self::Gemm => "GEMM",
90            Self::Conv2d => "Conv2D",
91            Self::Elementwise => "Elementwise",
92            Self::Reduction => "Reduction",
93            Self::Attention => "Attention",
94            Self::Bandwidth => "Bandwidth",
95        }
96    }
97}
98
99/// Performance measurement for a single backend/size combination
100#[derive(Debug, Clone)]
101pub struct BackendMeasurement {
102    /// Backend used
103    pub backend: Backend,
104    /// Workload type
105    pub workload: WorkloadType,
106    /// Problem size (elements)
107    pub size: usize,
108    /// Latency in microseconds
109    pub latency_us: f64,
110    /// Throughput (ops/sec or elements/sec)
111    pub throughput: f64,
112    /// Efficiency (% of theoretical peak)
113    pub efficiency_percent: f64,
114    /// GPU transfer time (if applicable)
115    pub transfer_time_us: Option<f64>,
116    /// Compute time (excluding transfer)
117    pub compute_time_us: Option<f64>,
118}
119
120impl BackendMeasurement {
121    /// Create new measurement
122    pub fn new(
123        backend: Backend,
124        workload: WorkloadType,
125        size: usize,
126        latency_us: f64,
127        throughput: f64,
128    ) -> Self {
129        Self {
130            backend,
131            workload,
132            size,
133            latency_us,
134            throughput,
135            efficiency_percent: 0.0,
136            transfer_time_us: None,
137            compute_time_us: None,
138        }
139    }
140
141    /// Set efficiency
142    pub fn with_efficiency(mut self, efficiency: f64) -> Self {
143        self.efficiency_percent = efficiency;
144        self
145    }
146
147    /// Set GPU timing breakdown
148    pub fn with_gpu_timing(mut self, transfer_us: f64, compute_us: f64) -> Self {
149        self.transfer_time_us = Some(transfer_us);
150        self.compute_time_us = Some(compute_us);
151        self
152    }
153
154    /// Get transfer overhead ratio (transfer / total)
155    pub fn transfer_overhead(&self) -> Option<f64> {
156        match (self.transfer_time_us, self.compute_time_us) {
157            (Some(t), Some(c)) if t + c > 0.0 => Some(t / (t + c)),
158            _ => None,
159        }
160    }
161}
162
163/// Comparison result between two backends
164#[derive(Debug, Clone)]
165pub struct BackendComparison {
166    /// Baseline backend
167    pub baseline: Backend,
168    /// Comparison backend
169    pub comparison: Backend,
170    /// Workload type
171    pub workload: WorkloadType,
172    /// Problem size
173    pub size: usize,
174    /// Efficiency ratio (comparison / baseline)
175    pub efficiency_ratio: f64,
176    /// Speedup (baseline_latency / comparison_latency)
177    pub speedup: f64,
178    /// Is this a regression? (efficiency_ratio < 1.0 - threshold)
179    pub is_regression: bool,
180    /// Regression threshold used
181    pub threshold: f64,
182}
183
184impl BackendComparison {
185    /// Get summary message
186    pub fn summary(&self) -> String {
187        if self.is_regression {
188            format!(
189                "REGRESSION: {} -> {} on {} size={}: {:.1}% slower",
190                self.baseline.name(),
191                self.comparison.name(),
192                self.workload.name(),
193                self.size,
194                (1.0 - self.speedup) * 100.0
195            )
196        } else {
197            format!(
198                "OK: {} -> {} on {} size={}: {:.1}x speedup",
199                self.baseline.name(),
200                self.comparison.name(),
201                self.workload.name(),
202                self.size,
203                self.speedup
204            )
205        }
206    }
207}
208
209/// Size cliff detection result
210#[derive(Debug, Clone)]
211pub struct SizeCliff {
212    /// Backend where cliff occurs
213    pub backend: Backend,
214    /// Workload type
215    pub workload: WorkloadType,
216    /// Size before cliff
217    pub size_before: usize,
218    /// Size after cliff
219    pub size_after: usize,
220    /// Efficiency before cliff
221    pub efficiency_before: f64,
222    /// Efficiency after cliff
223    pub efficiency_after: f64,
224    /// Drop percentage
225    pub drop_percent: f64,
226}
227
228impl SizeCliff {
229    /// Get summary message
230    pub fn summary(&self) -> String {
231        format!(
232            "CLIFF: {} {} at {}\u{2192}{}: {:.1}% efficiency drop",
233            self.backend.name(),
234            self.workload.name(),
235            self.size_before,
236            self.size_after,
237            self.drop_percent
238        )
239    }
240}
241
242/// Backend recommendation
243#[derive(Debug, Clone)]
244pub struct BackendRecommendation {
245    /// Recommended backend
246    pub backend: Backend,
247    /// Workload type
248    pub workload: WorkloadType,
249    /// Problem size
250    pub size: usize,
251    /// Expected efficiency
252    pub expected_efficiency: f64,
253    /// Confidence (0.0 - 1.0)
254    pub confidence: f64,
255    /// Reason for recommendation
256    pub reason: String,
257}
cbtop/backend_regression/types.rs

cbtop/backend_regression/
types.rs