cbtop/backend_regression/
types.rs1#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
5pub enum Backend {
6 Scalar,
8 Sse2,
10 Avx2,
12 Avx512,
14 Neon,
16 Cuda,
18 Metal,
20 Vulkan,
22 WebGpu,
24}
25
26impl Backend {
27 pub fn name(&self) -> &'static str {
29 match self {
30 Self::Scalar => "Scalar",
31 Self::Sse2 => "SSE2",
32 Self::Avx2 => "AVX2",
33 Self::Avx512 => "AVX-512",
34 Self::Neon => "NEON",
35 Self::Cuda => "CUDA",
36 Self::Metal => "Metal",
37 Self::Vulkan => "Vulkan",
38 Self::WebGpu => "WebGPU",
39 }
40 }
41
42 pub fn is_gpu(&self) -> bool {
44 matches!(self, Self::Cuda | Self::Metal | Self::Vulkan | Self::WebGpu)
45 }
46
47 pub fn is_simd(&self) -> bool {
49 matches!(self, Self::Sse2 | Self::Avx2 | Self::Avx512 | Self::Neon)
50 }
51
52 pub fn theoretical_speedup(&self) -> f64 {
54 match self {
55 Self::Scalar => 1.0,
56 Self::Sse2 => 4.0, Self::Avx2 => 8.0, Self::Avx512 => 16.0, Self::Neon => 4.0, Self::Cuda => 100.0, Self::Metal => 50.0, Self::Vulkan => 50.0, Self::WebGpu => 30.0, }
65 }
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
70pub enum WorkloadType {
71 Gemm,
73 Conv2d,
75 Elementwise,
77 Reduction,
79 Attention,
81 Bandwidth,
83}
84
85impl WorkloadType {
86 pub fn name(&self) -> &'static str {
88 match self {
89 Self::Gemm => "GEMM",
90 Self::Conv2d => "Conv2D",
91 Self::Elementwise => "Elementwise",
92 Self::Reduction => "Reduction",
93 Self::Attention => "Attention",
94 Self::Bandwidth => "Bandwidth",
95 }
96 }
97}
98
99#[derive(Debug, Clone)]
101pub struct BackendMeasurement {
102 pub backend: Backend,
104 pub workload: WorkloadType,
106 pub size: usize,
108 pub latency_us: f64,
110 pub throughput: f64,
112 pub efficiency_percent: f64,
114 pub transfer_time_us: Option<f64>,
116 pub compute_time_us: Option<f64>,
118}
119
120impl BackendMeasurement {
121 pub fn new(
123 backend: Backend,
124 workload: WorkloadType,
125 size: usize,
126 latency_us: f64,
127 throughput: f64,
128 ) -> Self {
129 Self {
130 backend,
131 workload,
132 size,
133 latency_us,
134 throughput,
135 efficiency_percent: 0.0,
136 transfer_time_us: None,
137 compute_time_us: None,
138 }
139 }
140
141 pub fn with_efficiency(mut self, efficiency: f64) -> Self {
143 self.efficiency_percent = efficiency;
144 self
145 }
146
147 pub fn with_gpu_timing(mut self, transfer_us: f64, compute_us: f64) -> Self {
149 self.transfer_time_us = Some(transfer_us);
150 self.compute_time_us = Some(compute_us);
151 self
152 }
153
154 pub fn transfer_overhead(&self) -> Option<f64> {
156 match (self.transfer_time_us, self.compute_time_us) {
157 (Some(t), Some(c)) if t + c > 0.0 => Some(t / (t + c)),
158 _ => None,
159 }
160 }
161}
162
163#[derive(Debug, Clone)]
165pub struct BackendComparison {
166 pub baseline: Backend,
168 pub comparison: Backend,
170 pub workload: WorkloadType,
172 pub size: usize,
174 pub efficiency_ratio: f64,
176 pub speedup: f64,
178 pub is_regression: bool,
180 pub threshold: f64,
182}
183
184impl BackendComparison {
185 pub fn summary(&self) -> String {
187 if self.is_regression {
188 format!(
189 "REGRESSION: {} -> {} on {} size={}: {:.1}% slower",
190 self.baseline.name(),
191 self.comparison.name(),
192 self.workload.name(),
193 self.size,
194 (1.0 - self.speedup) * 100.0
195 )
196 } else {
197 format!(
198 "OK: {} -> {} on {} size={}: {:.1}x speedup",
199 self.baseline.name(),
200 self.comparison.name(),
201 self.workload.name(),
202 self.size,
203 self.speedup
204 )
205 }
206 }
207}
208
209#[derive(Debug, Clone)]
211pub struct SizeCliff {
212 pub backend: Backend,
214 pub workload: WorkloadType,
216 pub size_before: usize,
218 pub size_after: usize,
220 pub efficiency_before: f64,
222 pub efficiency_after: f64,
224 pub drop_percent: f64,
226}
227
228impl SizeCliff {
229 pub fn summary(&self) -> String {
231 format!(
232 "CLIFF: {} {} at {}\u{2192}{}: {:.1}% efficiency drop",
233 self.backend.name(),
234 self.workload.name(),
235 self.size_before,
236 self.size_after,
237 self.drop_percent
238 )
239 }
240}
241
242#[derive(Debug, Clone)]
244pub struct BackendRecommendation {
245 pub backend: Backend,
247 pub workload: WorkloadType,
249 pub size: usize,
251 pub expected_efficiency: f64,
253 pub confidence: f64,
255 pub reason: String,
257}