1use crate::gpu::{GpuConfig, GpuVectorIndex};
7use crate::similarity::SimilarityMetric;
8use crate::Vector;
9use anyhow::Result;
10use scirs2_core::random;
11use scirs2_core::Rng;
12use std::time::{Duration, Instant};
13
14#[derive(Debug, Clone)]
16pub struct GpuBenchmarkConfig {
17 pub database_size: usize,
19 pub query_count: usize,
21 pub dimensions: Vec<usize>,
23 pub metrics: Vec<SimilarityMetric>,
25 pub warmup_iterations: usize,
27 pub measurement_iterations: usize,
29 pub compare_cpu: bool,
31 pub enable_gpu: bool,
33}
34
35impl Default for GpuBenchmarkConfig {
36 fn default() -> Self {
37 Self {
38 database_size: 10_000,
39 query_count: 100,
40 dimensions: vec![128, 256, 512, 768, 1024],
41 metrics: vec![
42 SimilarityMetric::Cosine,
43 SimilarityMetric::Euclidean,
44 SimilarityMetric::Manhattan,
45 SimilarityMetric::Pearson,
46 SimilarityMetric::Jaccard,
47 SimilarityMetric::Angular,
48 ],
49 warmup_iterations: 3,
50 measurement_iterations: 10,
51 compare_cpu: true,
52 enable_gpu: true,
53 }
54 }
55}
56
57#[derive(Debug, Clone)]
59pub struct BenchmarkResult {
60 pub metric: SimilarityMetric,
61 pub dimension: usize,
62 pub database_size: usize,
63 pub query_count: usize,
64 pub cpu_time_ms: Option<f64>,
65 pub gpu_time_ms: Option<f64>,
66 pub speedup: Option<f64>,
67 pub throughput_qps: f64,
68 pub memory_usage_mb: f64,
69}
70
71impl BenchmarkResult {
72 fn calculate_speedup(&mut self) {
74 if let (Some(cpu_time), Some(gpu_time)) = (self.cpu_time_ms, self.gpu_time_ms) {
75 if gpu_time > 0.0 {
76 self.speedup = Some(cpu_time / gpu_time);
77 }
78 }
79 }
80
81 fn calculate_throughput(&mut self) {
83 let time_ms = self.gpu_time_ms.or(self.cpu_time_ms).unwrap_or(1.0);
84 if time_ms > 0.0 {
85 self.throughput_qps = (self.query_count as f64 / time_ms) * 1000.0;
86 }
87 }
88}
89
90pub struct GpuBenchmarkSuite {
92 config: GpuBenchmarkConfig,
93 results: Vec<BenchmarkResult>,
94}
95
96impl GpuBenchmarkSuite {
97 pub fn new(config: GpuBenchmarkConfig) -> Self {
99 Self {
100 config,
101 results: Vec::new(),
102 }
103 }
104
105 pub fn run(&mut self) -> Result<&[BenchmarkResult]> {
107 tracing::info!(
108 "Starting GPU benchmark suite with {} metrics, {} dimensions",
109 self.config.metrics.len(),
110 self.config.dimensions.len()
111 );
112
113 for &dim in &self.config.dimensions {
114 for metric in &self.config.metrics {
115 tracing::info!(
116 "Benchmarking {} metric with dimension {}",
117 format!("{:?}", metric),
118 dim
119 );
120
121 let result = self.benchmark_metric(*metric, dim)?;
122 self.results.push(result);
123 }
124 }
125
126 Ok(&self.results)
127 }
128
129 fn benchmark_metric(&self, metric: SimilarityMetric, dim: usize) -> Result<BenchmarkResult> {
131 let (database, queries) = self.generate_test_data(dim)?;
133
134 let mut result = BenchmarkResult {
135 metric,
136 dimension: dim,
137 database_size: self.config.database_size,
138 query_count: self.config.query_count,
139 cpu_time_ms: None,
140 gpu_time_ms: None,
141 speedup: None,
142 throughput_qps: 0.0,
143 memory_usage_mb: self.estimate_memory_usage(dim),
144 };
145
146 if self.config.compare_cpu {
148 result.cpu_time_ms = Some(self.benchmark_cpu(&database, &queries, metric)?);
149 }
150
151 if self.config.enable_gpu {
153 match self.benchmark_gpu(&database, &queries, metric, dim) {
154 Ok(time) => result.gpu_time_ms = Some(time),
155 Err(e) => {
156 tracing::warn!("GPU benchmark failed: {}, falling back to CPU-only", e);
157 }
158 }
159 }
160
161 result.calculate_speedup();
162 result.calculate_throughput();
163
164 Ok(result)
165 }
166
167 fn generate_test_data(&self, dim: usize) -> Result<(Vec<Vector>, Vec<Vector>)> {
169 let mut rng = random::rng();
170
171 let mut database = Vec::with_capacity(self.config.database_size);
172 for _i in 0..self.config.database_size {
173 let values: Vec<f32> = (0..dim).map(|_| rng.random_range(0.0..1.0)).collect();
174 database.push(Vector::new(values));
175 }
176
177 let mut queries = Vec::with_capacity(self.config.query_count);
178 for _i in 0..self.config.query_count {
179 let values: Vec<f32> = (0..dim).map(|_| rng.random_range(0.0..1.0)).collect();
180 queries.push(Vector::new(values));
181 }
182
183 Ok((database, queries))
184 }
185
186 fn benchmark_cpu(
188 &self,
189 database: &[Vector],
190 queries: &[Vector],
191 metric: SimilarityMetric,
192 ) -> Result<f64> {
193 for _ in 0..self.config.warmup_iterations {
195 for query in queries.iter().take(5) {
196 for db_vec in database.iter().take(100) {
197 let _ = metric.compute(query, db_vec)?;
198 }
199 }
200 }
201
202 let mut total_time = Duration::ZERO;
204 for _ in 0..self.config.measurement_iterations {
205 let start = Instant::now();
206 for query in queries {
207 for db_vec in database {
208 let _ = metric.compute(query, db_vec)?;
209 }
210 }
211 total_time += start.elapsed();
212 }
213
214 let avg_time_ms =
215 total_time.as_secs_f64() * 1000.0 / self.config.measurement_iterations as f64;
216 Ok(avg_time_ms)
217 }
218
219 fn benchmark_gpu(
221 &self,
222 database: &[Vector],
223 queries: &[Vector],
224 metric: SimilarityMetric,
225 _dim: usize,
226 ) -> Result<f64> {
227 let gpu_config = GpuConfig {
228 device_id: 0,
229 enable_tensor_cores: true,
230 enable_mixed_precision: true,
231 memory_pool_size: 1 << 30, stream_count: 4,
233 ..Default::default()
234 };
235
236 let mut gpu_index = GpuVectorIndex::new(gpu_config)?;
237 gpu_index.add_vectors(database.to_vec())?;
238
239 for _ in 0..self.config.warmup_iterations {
241 for query in queries.iter().take(5) {
242 let _ = gpu_index.search(query, 10, metric)?;
243 }
244 }
245
246 let mut total_time = Duration::ZERO;
248 for _ in 0..self.config.measurement_iterations {
249 let start = Instant::now();
250 for query in queries {
251 let _ = gpu_index.search(query, 10, metric)?;
252 }
253 total_time += start.elapsed();
254 }
255
256 let avg_time_ms =
257 total_time.as_secs_f64() * 1000.0 / self.config.measurement_iterations as f64;
258 Ok(avg_time_ms)
259 }
260
261 fn estimate_memory_usage(&self, dim: usize) -> f64 {
263 let vector_size_bytes = dim * std::mem::size_of::<f32>();
264 let total_vectors = self.config.database_size + self.config.query_count;
265 let total_bytes = total_vectors * vector_size_bytes;
266 total_bytes as f64 / (1024.0 * 1024.0) }
268
269 pub fn generate_report(&self) -> String {
271 let mut report = String::new();
272 report.push_str("=== GPU Benchmark Report ===\n\n");
273
274 report.push_str(&format!(
275 "Configuration:\n Database size: {}\n Query count: {}\n Dimensions tested: {:?}\n\n",
276 self.config.database_size, self.config.query_count, self.config.dimensions
277 ));
278
279 report.push_str("Results:\n");
280 report.push_str(&format!(
281 "{:<20} {:<10} {:<12} {:<12} {:<10} {:<12}\n",
282 "Metric", "Dimension", "CPU (ms)", "GPU (ms)", "Speedup", "QPS"
283 ));
284 report.push_str(&"-".repeat(90));
285 report.push('\n');
286
287 for result in &self.results {
288 let cpu_time = result
289 .cpu_time_ms
290 .map(|t| format!("{:.2}", t))
291 .unwrap_or_else(|| "N/A".to_string());
292 let gpu_time = result
293 .gpu_time_ms
294 .map(|t| format!("{:.2}", t))
295 .unwrap_or_else(|| "N/A".to_string());
296 let speedup = result
297 .speedup
298 .map(|s| format!("{:.2}x", s))
299 .unwrap_or_else(|| "N/A".to_string());
300
301 report.push_str(&format!(
302 "{:<20} {:<10} {:<12} {:<12} {:<10} {:<12.0}\n",
303 format!("{:?}", result.metric),
304 result.dimension,
305 cpu_time,
306 gpu_time,
307 speedup,
308 result.throughput_qps
309 ));
310 }
311
312 report.push('\n');
313 self.add_summary_statistics(&mut report);
314
315 report
316 }
317
318 fn add_summary_statistics(&self, report: &mut String) {
320 if self.results.is_empty() {
321 return;
322 }
323
324 report.push_str("Summary Statistics:\n");
325
326 let speedups: Vec<f64> = self.results.iter().filter_map(|r| r.speedup).collect();
328
329 if !speedups.is_empty() {
330 let avg_speedup: f64 = speedups.iter().sum::<f64>() / speedups.len() as f64;
331 let max_speedup = speedups
332 .iter()
333 .copied()
334 .max_by(|a, b| a.partial_cmp(b).unwrap())
335 .unwrap();
336
337 report.push_str(&format!(" Average speedup: {:.2}x\n", avg_speedup));
338 report.push_str(&format!(" Maximum speedup: {:.2}x\n", max_speedup));
339 }
340
341 let total_qps: f64 = self.results.iter().map(|r| r.throughput_qps).sum();
343 report.push_str(&format!(
344 " Total throughput: {:.0} queries/sec\n",
345 total_qps / self.results.len() as f64
346 ));
347
348 let total_memory: f64 = self.results.iter().map(|r| r.memory_usage_mb).sum();
350 report.push_str(&format!(
351 " Estimated memory: {:.2} MB\n",
352 total_memory / self.results.len() as f64
353 ));
354 }
355
356 pub fn export_json(&self) -> Result<String> {
358 #[derive(serde::Serialize)]
359 struct JsonResult {
360 metric: String,
361 dimension: usize,
362 database_size: usize,
363 query_count: usize,
364 cpu_time_ms: Option<f64>,
365 gpu_time_ms: Option<f64>,
366 speedup: Option<f64>,
367 throughput_qps: f64,
368 memory_usage_mb: f64,
369 }
370
371 let json_results: Vec<JsonResult> = self
372 .results
373 .iter()
374 .map(|r| JsonResult {
375 metric: format!("{:?}", r.metric),
376 dimension: r.dimension,
377 database_size: r.database_size,
378 query_count: r.query_count,
379 cpu_time_ms: r.cpu_time_ms,
380 gpu_time_ms: r.gpu_time_ms,
381 speedup: r.speedup,
382 throughput_qps: r.throughput_qps,
383 memory_usage_mb: r.memory_usage_mb,
384 })
385 .collect();
386
387 Ok(serde_json::to_string_pretty(&json_results)?)
388 }
389
390 pub fn results(&self) -> &[BenchmarkResult] {
392 &self.results
393 }
394}
395
396#[cfg(test)]
397mod tests {
398 use super::*;
399
400 #[test]
401 fn test_benchmark_config_default() {
402 let config = GpuBenchmarkConfig::default();
403 assert_eq!(config.database_size, 10_000);
404 assert_eq!(config.query_count, 100);
405 assert!(!config.dimensions.is_empty());
406 assert!(!config.metrics.is_empty());
407 }
408
409 #[test]
410 fn test_memory_estimation() {
411 let config = GpuBenchmarkConfig::default();
412 let suite = GpuBenchmarkSuite::new(config);
413 let memory_mb = suite.estimate_memory_usage(256);
414 assert!(memory_mb > 0.0);
415 }
416
417 #[test]
418 fn test_benchmark_result_calculation() {
419 let mut result = BenchmarkResult {
420 metric: SimilarityMetric::Cosine,
421 dimension: 128,
422 database_size: 1000,
423 query_count: 100,
424 cpu_time_ms: Some(100.0),
425 gpu_time_ms: Some(10.0),
426 speedup: None,
427 throughput_qps: 0.0,
428 memory_usage_mb: 10.0,
429 };
430
431 result.calculate_speedup();
432 assert_eq!(result.speedup, Some(10.0));
433
434 result.calculate_throughput();
435 assert!(result.throughput_qps > 0.0);
436 }
437
438 #[test]
439 fn test_generate_test_data() {
440 let config = GpuBenchmarkConfig {
441 database_size: 100,
442 query_count: 10,
443 ..Default::default()
444 };
445
446 let suite = GpuBenchmarkSuite::new(config);
447 let result = suite.generate_test_data(128);
448 assert!(result.is_ok());
449
450 let (database, queries) = result.unwrap();
451 assert_eq!(database.len(), 100);
452 assert_eq!(queries.len(), 10);
453 }
454}