Skip to main content

oxirs_vec/
gpu_benchmarks.rs

1//! Comprehensive GPU vs CPU benchmarking for vector operations
2//!
3//! This module provides detailed benchmarks comparing GPU and CPU performance
4//! across all supported distance metrics, different vector dimensions, and dataset sizes.
5
6use crate::gpu::{GpuConfig, GpuVectorIndex};
7use crate::similarity::SimilarityMetric;
8use crate::Vector;
9use anyhow::Result;
10use scirs2_core::random::{self, RngExt};
11use std::time::{Duration, Instant};
12
13/// Benchmark configuration
14#[derive(Debug, Clone)]
15pub struct GpuBenchmarkConfig {
16    /// Number of vectors in the database
17    pub database_size: usize,
18    /// Number of query vectors
19    pub query_count: usize,
20    /// Vector dimensions to test
21    pub dimensions: Vec<usize>,
22    /// Distance metrics to benchmark
23    pub metrics: Vec<SimilarityMetric>,
24    /// Number of warmup iterations
25    pub warmup_iterations: usize,
26    /// Number of measurement iterations
27    pub measurement_iterations: usize,
28    /// Enable CPU baseline comparison
29    pub compare_cpu: bool,
30    /// Enable GPU acceleration
31    pub enable_gpu: bool,
32}
33
34impl Default for GpuBenchmarkConfig {
35    fn default() -> Self {
36        Self {
37            database_size: 10_000,
38            query_count: 100,
39            dimensions: vec![128, 256, 512, 768, 1024],
40            metrics: vec![
41                SimilarityMetric::Cosine,
42                SimilarityMetric::Euclidean,
43                SimilarityMetric::Manhattan,
44                SimilarityMetric::Pearson,
45                SimilarityMetric::Jaccard,
46                SimilarityMetric::Angular,
47            ],
48            warmup_iterations: 3,
49            measurement_iterations: 10,
50            compare_cpu: true,
51            enable_gpu: true,
52        }
53    }
54}
55
56/// Benchmark results for a single metric and dimension
57#[derive(Debug, Clone)]
58pub struct BenchmarkResult {
59    pub metric: SimilarityMetric,
60    pub dimension: usize,
61    pub database_size: usize,
62    pub query_count: usize,
63    pub cpu_time_ms: Option<f64>,
64    pub gpu_time_ms: Option<f64>,
65    pub speedup: Option<f64>,
66    pub throughput_qps: f64,
67    pub memory_usage_mb: f64,
68}
69
70impl BenchmarkResult {
71    /// Calculate speedup factor (GPU vs CPU)
72    fn calculate_speedup(&mut self) {
73        if let (Some(cpu_time), Some(gpu_time)) = (self.cpu_time_ms, self.gpu_time_ms) {
74            if gpu_time > 0.0 {
75                self.speedup = Some(cpu_time / gpu_time);
76            }
77        }
78    }
79
80    /// Calculate queries per second
81    fn calculate_throughput(&mut self) {
82        let time_ms = self.gpu_time_ms.or(self.cpu_time_ms).unwrap_or(1.0);
83        if time_ms > 0.0 {
84            self.throughput_qps = (self.query_count as f64 / time_ms) * 1000.0;
85        }
86    }
87}
88
89/// Comprehensive GPU benchmark suite
90pub struct GpuBenchmarkSuite {
91    config: GpuBenchmarkConfig,
92    results: Vec<BenchmarkResult>,
93}
94
95impl GpuBenchmarkSuite {
96    /// Create a new benchmark suite
97    pub fn new(config: GpuBenchmarkConfig) -> Self {
98        Self {
99            config,
100            results: Vec::new(),
101        }
102    }
103
104    /// Run all benchmarks
105    pub fn run(&mut self) -> Result<&[BenchmarkResult]> {
106        tracing::info!(
107            "Starting GPU benchmark suite with {} metrics, {} dimensions",
108            self.config.metrics.len(),
109            self.config.dimensions.len()
110        );
111
112        for &dim in &self.config.dimensions {
113            for metric in &self.config.metrics {
114                tracing::info!(
115                    "Benchmarking {} metric with dimension {}",
116                    format!("{:?}", metric),
117                    dim
118                );
119
120                let result = self.benchmark_metric(*metric, dim)?;
121                self.results.push(result);
122            }
123        }
124
125        Ok(&self.results)
126    }
127
128    /// Benchmark a single metric and dimension
129    fn benchmark_metric(&self, metric: SimilarityMetric, dim: usize) -> Result<BenchmarkResult> {
130        // Generate test data
131        let (database, queries) = self.generate_test_data(dim)?;
132
133        let mut result = BenchmarkResult {
134            metric,
135            dimension: dim,
136            database_size: self.config.database_size,
137            query_count: self.config.query_count,
138            cpu_time_ms: None,
139            gpu_time_ms: None,
140            speedup: None,
141            throughput_qps: 0.0,
142            memory_usage_mb: self.estimate_memory_usage(dim),
143        };
144
145        // CPU baseline
146        if self.config.compare_cpu {
147            result.cpu_time_ms = Some(self.benchmark_cpu(&database, &queries, metric)?);
148        }
149
150        // GPU benchmark
151        if self.config.enable_gpu {
152            match self.benchmark_gpu(&database, &queries, metric, dim) {
153                Ok(time) => result.gpu_time_ms = Some(time),
154                Err(e) => {
155                    tracing::warn!("GPU benchmark failed: {}, falling back to CPU-only", e);
156                }
157            }
158        }
159
160        result.calculate_speedup();
161        result.calculate_throughput();
162
163        Ok(result)
164    }
165
166    /// Generate synthetic test data
167    fn generate_test_data(&self, dim: usize) -> Result<(Vec<Vector>, Vec<Vector>)> {
168        let mut rng = random::rng();
169
170        let mut database = Vec::with_capacity(self.config.database_size);
171        for _i in 0..self.config.database_size {
172            let values: Vec<f32> = (0..dim).map(|_| rng.random_range(0.0..1.0)).collect();
173            database.push(Vector::new(values));
174        }
175
176        let mut queries = Vec::with_capacity(self.config.query_count);
177        for _i in 0..self.config.query_count {
178            let values: Vec<f32> = (0..dim).map(|_| rng.random_range(0.0..1.0)).collect();
179            queries.push(Vector::new(values));
180        }
181
182        Ok((database, queries))
183    }
184
185    /// Benchmark CPU implementation
186    fn benchmark_cpu(
187        &self,
188        database: &[Vector],
189        queries: &[Vector],
190        metric: SimilarityMetric,
191    ) -> Result<f64> {
192        // Warmup
193        for _ in 0..self.config.warmup_iterations {
194            for query in queries.iter().take(5) {
195                for db_vec in database.iter().take(100) {
196                    let _ = metric.compute(query, db_vec)?;
197                }
198            }
199        }
200
201        // Measurement
202        let mut total_time = Duration::ZERO;
203        for _ in 0..self.config.measurement_iterations {
204            let start = Instant::now();
205            for query in queries {
206                for db_vec in database {
207                    let _ = metric.compute(query, db_vec)?;
208                }
209            }
210            total_time += start.elapsed();
211        }
212
213        let avg_time_ms =
214            total_time.as_secs_f64() * 1000.0 / self.config.measurement_iterations as f64;
215        Ok(avg_time_ms)
216    }
217
218    /// Benchmark GPU implementation
219    fn benchmark_gpu(
220        &self,
221        database: &[Vector],
222        queries: &[Vector],
223        metric: SimilarityMetric,
224        _dim: usize,
225    ) -> Result<f64> {
226        let gpu_config = GpuConfig {
227            device_id: 0,
228            enable_tensor_cores: true,
229            enable_mixed_precision: true,
230            memory_pool_size: 1 << 30, // 1GB
231            stream_count: 4,
232            ..Default::default()
233        };
234
235        let mut gpu_index = GpuVectorIndex::new(gpu_config)?;
236        gpu_index.add_vectors(database.to_vec())?;
237
238        // Warmup
239        for _ in 0..self.config.warmup_iterations {
240            for query in queries.iter().take(5) {
241                let _ = gpu_index.search(query, 10, metric)?;
242            }
243        }
244
245        // Measurement
246        let mut total_time = Duration::ZERO;
247        for _ in 0..self.config.measurement_iterations {
248            let start = Instant::now();
249            for query in queries {
250                let _ = gpu_index.search(query, 10, metric)?;
251            }
252            total_time += start.elapsed();
253        }
254
255        let avg_time_ms =
256            total_time.as_secs_f64() * 1000.0 / self.config.measurement_iterations as f64;
257        Ok(avg_time_ms)
258    }
259
260    /// Estimate memory usage
261    fn estimate_memory_usage(&self, dim: usize) -> f64 {
262        let vector_size_bytes = dim * std::mem::size_of::<f32>();
263        let total_vectors = self.config.database_size + self.config.query_count;
264        let total_bytes = total_vectors * vector_size_bytes;
265        total_bytes as f64 / (1024.0 * 1024.0) // Convert to MB
266    }
267
268    /// Generate benchmark report
269    pub fn generate_report(&self) -> String {
270        let mut report = String::new();
271        report.push_str("=== GPU Benchmark Report ===\n\n");
272
273        report.push_str(&format!(
274            "Configuration:\n  Database size: {}\n  Query count: {}\n  Dimensions tested: {:?}\n\n",
275            self.config.database_size, self.config.query_count, self.config.dimensions
276        ));
277
278        report.push_str("Results:\n");
279        report.push_str(&format!(
280            "{:<20} {:<10} {:<12} {:<12} {:<10} {:<12}\n",
281            "Metric", "Dimension", "CPU (ms)", "GPU (ms)", "Speedup", "QPS"
282        ));
283        report.push_str(&"-".repeat(90));
284        report.push('\n');
285
286        for result in &self.results {
287            let cpu_time = result
288                .cpu_time_ms
289                .map(|t| format!("{:.2}", t))
290                .unwrap_or_else(|| "N/A".to_string());
291            let gpu_time = result
292                .gpu_time_ms
293                .map(|t| format!("{:.2}", t))
294                .unwrap_or_else(|| "N/A".to_string());
295            let speedup = result
296                .speedup
297                .map(|s| format!("{:.2}x", s))
298                .unwrap_or_else(|| "N/A".to_string());
299
300            report.push_str(&format!(
301                "{:<20} {:<10} {:<12} {:<12} {:<10} {:<12.0}\n",
302                format!("{:?}", result.metric),
303                result.dimension,
304                cpu_time,
305                gpu_time,
306                speedup,
307                result.throughput_qps
308            ));
309        }
310
311        report.push('\n');
312        self.add_summary_statistics(&mut report);
313
314        report
315    }
316
317    /// Add summary statistics to report
318    fn add_summary_statistics(&self, report: &mut String) {
319        if self.results.is_empty() {
320            return;
321        }
322
323        report.push_str("Summary Statistics:\n");
324
325        // Calculate average speedup
326        let speedups: Vec<f64> = self.results.iter().filter_map(|r| r.speedup).collect();
327
328        if !speedups.is_empty() {
329            let avg_speedup: f64 = speedups.iter().sum::<f64>() / speedups.len() as f64;
330            let max_speedup = speedups
331                .iter()
332                .copied()
333                .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
334                .expect("speedups validated to be non-empty");
335
336            report.push_str(&format!("  Average speedup: {:.2}x\n", avg_speedup));
337            report.push_str(&format!("  Maximum speedup: {:.2}x\n", max_speedup));
338        }
339
340        // Calculate total throughput
341        let total_qps: f64 = self.results.iter().map(|r| r.throughput_qps).sum();
342        report.push_str(&format!(
343            "  Total throughput: {:.0} queries/sec\n",
344            total_qps / self.results.len() as f64
345        ));
346
347        // Memory usage
348        let total_memory: f64 = self.results.iter().map(|r| r.memory_usage_mb).sum();
349        report.push_str(&format!(
350            "  Estimated memory: {:.2} MB\n",
351            total_memory / self.results.len() as f64
352        ));
353    }
354
355    /// Export results to JSON
356    pub fn export_json(&self) -> Result<String> {
357        #[derive(serde::Serialize)]
358        struct JsonResult {
359            metric: String,
360            dimension: usize,
361            database_size: usize,
362            query_count: usize,
363            cpu_time_ms: Option<f64>,
364            gpu_time_ms: Option<f64>,
365            speedup: Option<f64>,
366            throughput_qps: f64,
367            memory_usage_mb: f64,
368        }
369
370        let json_results: Vec<JsonResult> = self
371            .results
372            .iter()
373            .map(|r| JsonResult {
374                metric: format!("{:?}", r.metric),
375                dimension: r.dimension,
376                database_size: r.database_size,
377                query_count: r.query_count,
378                cpu_time_ms: r.cpu_time_ms,
379                gpu_time_ms: r.gpu_time_ms,
380                speedup: r.speedup,
381                throughput_qps: r.throughput_qps,
382                memory_usage_mb: r.memory_usage_mb,
383            })
384            .collect();
385
386        Ok(serde_json::to_string_pretty(&json_results)?)
387    }
388
389    /// Get benchmark results
390    pub fn results(&self) -> &[BenchmarkResult] {
391        &self.results
392    }
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398
399    #[test]
400    fn test_benchmark_config_default() {
401        let config = GpuBenchmarkConfig::default();
402        assert_eq!(config.database_size, 10_000);
403        assert_eq!(config.query_count, 100);
404        assert!(!config.dimensions.is_empty());
405        assert!(!config.metrics.is_empty());
406    }
407
408    #[test]
409    fn test_memory_estimation() {
410        let config = GpuBenchmarkConfig::default();
411        let suite = GpuBenchmarkSuite::new(config);
412        let memory_mb = suite.estimate_memory_usage(256);
413        assert!(memory_mb > 0.0);
414    }
415
416    #[test]
417    fn test_benchmark_result_calculation() {
418        let mut result = BenchmarkResult {
419            metric: SimilarityMetric::Cosine,
420            dimension: 128,
421            database_size: 1000,
422            query_count: 100,
423            cpu_time_ms: Some(100.0),
424            gpu_time_ms: Some(10.0),
425            speedup: None,
426            throughput_qps: 0.0,
427            memory_usage_mb: 10.0,
428        };
429
430        result.calculate_speedup();
431        assert_eq!(result.speedup, Some(10.0));
432
433        result.calculate_throughput();
434        assert!(result.throughput_qps > 0.0);
435    }
436
437    #[test]
438    fn test_generate_test_data() -> Result<()> {
439        let config = GpuBenchmarkConfig {
440            database_size: 100,
441            query_count: 10,
442            ..Default::default()
443        };
444
445        let suite = GpuBenchmarkSuite::new(config);
446        let result = suite.generate_test_data(128);
447        assert!(result.is_ok());
448
449        let (database, queries) = result?;
450        assert_eq!(database.len(), 100);
451        assert_eq!(queries.len(), 10);
452        Ok(())
453    }
454}