1use crate::gpu::{GpuConfig, GpuVectorIndex};
7use crate::similarity::SimilarityMetric;
8use crate::Vector;
9use anyhow::Result;
10use scirs2_core::random::{self, RngExt};
11use std::time::{Duration, Instant};
12
13#[derive(Debug, Clone)]
15pub struct GpuBenchmarkConfig {
16 pub database_size: usize,
18 pub query_count: usize,
20 pub dimensions: Vec<usize>,
22 pub metrics: Vec<SimilarityMetric>,
24 pub warmup_iterations: usize,
26 pub measurement_iterations: usize,
28 pub compare_cpu: bool,
30 pub enable_gpu: bool,
32}
33
34impl Default for GpuBenchmarkConfig {
35 fn default() -> Self {
36 Self {
37 database_size: 10_000,
38 query_count: 100,
39 dimensions: vec![128, 256, 512, 768, 1024],
40 metrics: vec![
41 SimilarityMetric::Cosine,
42 SimilarityMetric::Euclidean,
43 SimilarityMetric::Manhattan,
44 SimilarityMetric::Pearson,
45 SimilarityMetric::Jaccard,
46 SimilarityMetric::Angular,
47 ],
48 warmup_iterations: 3,
49 measurement_iterations: 10,
50 compare_cpu: true,
51 enable_gpu: true,
52 }
53 }
54}
55
56#[derive(Debug, Clone)]
58pub struct BenchmarkResult {
59 pub metric: SimilarityMetric,
60 pub dimension: usize,
61 pub database_size: usize,
62 pub query_count: usize,
63 pub cpu_time_ms: Option<f64>,
64 pub gpu_time_ms: Option<f64>,
65 pub speedup: Option<f64>,
66 pub throughput_qps: f64,
67 pub memory_usage_mb: f64,
68}
69
70impl BenchmarkResult {
71 fn calculate_speedup(&mut self) {
73 if let (Some(cpu_time), Some(gpu_time)) = (self.cpu_time_ms, self.gpu_time_ms) {
74 if gpu_time > 0.0 {
75 self.speedup = Some(cpu_time / gpu_time);
76 }
77 }
78 }
79
80 fn calculate_throughput(&mut self) {
82 let time_ms = self.gpu_time_ms.or(self.cpu_time_ms).unwrap_or(1.0);
83 if time_ms > 0.0 {
84 self.throughput_qps = (self.query_count as f64 / time_ms) * 1000.0;
85 }
86 }
87}
88
89pub struct GpuBenchmarkSuite {
91 config: GpuBenchmarkConfig,
92 results: Vec<BenchmarkResult>,
93}
94
95impl GpuBenchmarkSuite {
96 pub fn new(config: GpuBenchmarkConfig) -> Self {
98 Self {
99 config,
100 results: Vec::new(),
101 }
102 }
103
104 pub fn run(&mut self) -> Result<&[BenchmarkResult]> {
106 tracing::info!(
107 "Starting GPU benchmark suite with {} metrics, {} dimensions",
108 self.config.metrics.len(),
109 self.config.dimensions.len()
110 );
111
112 for &dim in &self.config.dimensions {
113 for metric in &self.config.metrics {
114 tracing::info!(
115 "Benchmarking {} metric with dimension {}",
116 format!("{:?}", metric),
117 dim
118 );
119
120 let result = self.benchmark_metric(*metric, dim)?;
121 self.results.push(result);
122 }
123 }
124
125 Ok(&self.results)
126 }
127
128 fn benchmark_metric(&self, metric: SimilarityMetric, dim: usize) -> Result<BenchmarkResult> {
130 let (database, queries) = self.generate_test_data(dim)?;
132
133 let mut result = BenchmarkResult {
134 metric,
135 dimension: dim,
136 database_size: self.config.database_size,
137 query_count: self.config.query_count,
138 cpu_time_ms: None,
139 gpu_time_ms: None,
140 speedup: None,
141 throughput_qps: 0.0,
142 memory_usage_mb: self.estimate_memory_usage(dim),
143 };
144
145 if self.config.compare_cpu {
147 result.cpu_time_ms = Some(self.benchmark_cpu(&database, &queries, metric)?);
148 }
149
150 if self.config.enable_gpu {
152 match self.benchmark_gpu(&database, &queries, metric, dim) {
153 Ok(time) => result.gpu_time_ms = Some(time),
154 Err(e) => {
155 tracing::warn!("GPU benchmark failed: {}, falling back to CPU-only", e);
156 }
157 }
158 }
159
160 result.calculate_speedup();
161 result.calculate_throughput();
162
163 Ok(result)
164 }
165
166 fn generate_test_data(&self, dim: usize) -> Result<(Vec<Vector>, Vec<Vector>)> {
168 let mut rng = random::rng();
169
170 let mut database = Vec::with_capacity(self.config.database_size);
171 for _i in 0..self.config.database_size {
172 let values: Vec<f32> = (0..dim).map(|_| rng.random_range(0.0..1.0)).collect();
173 database.push(Vector::new(values));
174 }
175
176 let mut queries = Vec::with_capacity(self.config.query_count);
177 for _i in 0..self.config.query_count {
178 let values: Vec<f32> = (0..dim).map(|_| rng.random_range(0.0..1.0)).collect();
179 queries.push(Vector::new(values));
180 }
181
182 Ok((database, queries))
183 }
184
185 fn benchmark_cpu(
187 &self,
188 database: &[Vector],
189 queries: &[Vector],
190 metric: SimilarityMetric,
191 ) -> Result<f64> {
192 for _ in 0..self.config.warmup_iterations {
194 for query in queries.iter().take(5) {
195 for db_vec in database.iter().take(100) {
196 let _ = metric.compute(query, db_vec)?;
197 }
198 }
199 }
200
201 let mut total_time = Duration::ZERO;
203 for _ in 0..self.config.measurement_iterations {
204 let start = Instant::now();
205 for query in queries {
206 for db_vec in database {
207 let _ = metric.compute(query, db_vec)?;
208 }
209 }
210 total_time += start.elapsed();
211 }
212
213 let avg_time_ms =
214 total_time.as_secs_f64() * 1000.0 / self.config.measurement_iterations as f64;
215 Ok(avg_time_ms)
216 }
217
218 fn benchmark_gpu(
220 &self,
221 database: &[Vector],
222 queries: &[Vector],
223 metric: SimilarityMetric,
224 _dim: usize,
225 ) -> Result<f64> {
226 let gpu_config = GpuConfig {
227 device_id: 0,
228 enable_tensor_cores: true,
229 enable_mixed_precision: true,
230 memory_pool_size: 1 << 30, stream_count: 4,
232 ..Default::default()
233 };
234
235 let mut gpu_index = GpuVectorIndex::new(gpu_config)?;
236 gpu_index.add_vectors(database.to_vec())?;
237
238 for _ in 0..self.config.warmup_iterations {
240 for query in queries.iter().take(5) {
241 let _ = gpu_index.search(query, 10, metric)?;
242 }
243 }
244
245 let mut total_time = Duration::ZERO;
247 for _ in 0..self.config.measurement_iterations {
248 let start = Instant::now();
249 for query in queries {
250 let _ = gpu_index.search(query, 10, metric)?;
251 }
252 total_time += start.elapsed();
253 }
254
255 let avg_time_ms =
256 total_time.as_secs_f64() * 1000.0 / self.config.measurement_iterations as f64;
257 Ok(avg_time_ms)
258 }
259
260 fn estimate_memory_usage(&self, dim: usize) -> f64 {
262 let vector_size_bytes = dim * std::mem::size_of::<f32>();
263 let total_vectors = self.config.database_size + self.config.query_count;
264 let total_bytes = total_vectors * vector_size_bytes;
265 total_bytes as f64 / (1024.0 * 1024.0) }
267
268 pub fn generate_report(&self) -> String {
270 let mut report = String::new();
271 report.push_str("=== GPU Benchmark Report ===\n\n");
272
273 report.push_str(&format!(
274 "Configuration:\n Database size: {}\n Query count: {}\n Dimensions tested: {:?}\n\n",
275 self.config.database_size, self.config.query_count, self.config.dimensions
276 ));
277
278 report.push_str("Results:\n");
279 report.push_str(&format!(
280 "{:<20} {:<10} {:<12} {:<12} {:<10} {:<12}\n",
281 "Metric", "Dimension", "CPU (ms)", "GPU (ms)", "Speedup", "QPS"
282 ));
283 report.push_str(&"-".repeat(90));
284 report.push('\n');
285
286 for result in &self.results {
287 let cpu_time = result
288 .cpu_time_ms
289 .map(|t| format!("{:.2}", t))
290 .unwrap_or_else(|| "N/A".to_string());
291 let gpu_time = result
292 .gpu_time_ms
293 .map(|t| format!("{:.2}", t))
294 .unwrap_or_else(|| "N/A".to_string());
295 let speedup = result
296 .speedup
297 .map(|s| format!("{:.2}x", s))
298 .unwrap_or_else(|| "N/A".to_string());
299
300 report.push_str(&format!(
301 "{:<20} {:<10} {:<12} {:<12} {:<10} {:<12.0}\n",
302 format!("{:?}", result.metric),
303 result.dimension,
304 cpu_time,
305 gpu_time,
306 speedup,
307 result.throughput_qps
308 ));
309 }
310
311 report.push('\n');
312 self.add_summary_statistics(&mut report);
313
314 report
315 }
316
317 fn add_summary_statistics(&self, report: &mut String) {
319 if self.results.is_empty() {
320 return;
321 }
322
323 report.push_str("Summary Statistics:\n");
324
325 let speedups: Vec<f64> = self.results.iter().filter_map(|r| r.speedup).collect();
327
328 if !speedups.is_empty() {
329 let avg_speedup: f64 = speedups.iter().sum::<f64>() / speedups.len() as f64;
330 let max_speedup = speedups
331 .iter()
332 .copied()
333 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
334 .expect("speedups validated to be non-empty");
335
336 report.push_str(&format!(" Average speedup: {:.2}x\n", avg_speedup));
337 report.push_str(&format!(" Maximum speedup: {:.2}x\n", max_speedup));
338 }
339
340 let total_qps: f64 = self.results.iter().map(|r| r.throughput_qps).sum();
342 report.push_str(&format!(
343 " Total throughput: {:.0} queries/sec\n",
344 total_qps / self.results.len() as f64
345 ));
346
347 let total_memory: f64 = self.results.iter().map(|r| r.memory_usage_mb).sum();
349 report.push_str(&format!(
350 " Estimated memory: {:.2} MB\n",
351 total_memory / self.results.len() as f64
352 ));
353 }
354
355 pub fn export_json(&self) -> Result<String> {
357 #[derive(serde::Serialize)]
358 struct JsonResult {
359 metric: String,
360 dimension: usize,
361 database_size: usize,
362 query_count: usize,
363 cpu_time_ms: Option<f64>,
364 gpu_time_ms: Option<f64>,
365 speedup: Option<f64>,
366 throughput_qps: f64,
367 memory_usage_mb: f64,
368 }
369
370 let json_results: Vec<JsonResult> = self
371 .results
372 .iter()
373 .map(|r| JsonResult {
374 metric: format!("{:?}", r.metric),
375 dimension: r.dimension,
376 database_size: r.database_size,
377 query_count: r.query_count,
378 cpu_time_ms: r.cpu_time_ms,
379 gpu_time_ms: r.gpu_time_ms,
380 speedup: r.speedup,
381 throughput_qps: r.throughput_qps,
382 memory_usage_mb: r.memory_usage_mb,
383 })
384 .collect();
385
386 Ok(serde_json::to_string_pretty(&json_results)?)
387 }
388
389 pub fn results(&self) -> &[BenchmarkResult] {
391 &self.results
392 }
393}
394
395#[cfg(test)]
396mod tests {
397 use super::*;
398
399 #[test]
400 fn test_benchmark_config_default() {
401 let config = GpuBenchmarkConfig::default();
402 assert_eq!(config.database_size, 10_000);
403 assert_eq!(config.query_count, 100);
404 assert!(!config.dimensions.is_empty());
405 assert!(!config.metrics.is_empty());
406 }
407
408 #[test]
409 fn test_memory_estimation() {
410 let config = GpuBenchmarkConfig::default();
411 let suite = GpuBenchmarkSuite::new(config);
412 let memory_mb = suite.estimate_memory_usage(256);
413 assert!(memory_mb > 0.0);
414 }
415
416 #[test]
417 fn test_benchmark_result_calculation() {
418 let mut result = BenchmarkResult {
419 metric: SimilarityMetric::Cosine,
420 dimension: 128,
421 database_size: 1000,
422 query_count: 100,
423 cpu_time_ms: Some(100.0),
424 gpu_time_ms: Some(10.0),
425 speedup: None,
426 throughput_qps: 0.0,
427 memory_usage_mb: 10.0,
428 };
429
430 result.calculate_speedup();
431 assert_eq!(result.speedup, Some(10.0));
432
433 result.calculate_throughput();
434 assert!(result.throughput_qps > 0.0);
435 }
436
437 #[test]
438 fn test_generate_test_data() -> Result<()> {
439 let config = GpuBenchmarkConfig {
440 database_size: 100,
441 query_count: 10,
442 ..Default::default()
443 };
444
445 let suite = GpuBenchmarkSuite::new(config);
446 let result = suite.generate_test_data(128);
447 assert!(result.is_ok());
448
449 let (database, queries) = result?;
450 assert_eq!(database.len(), 100);
451 assert_eq!(queries.len(), 10);
452 Ok(())
453 }
454}