oxirs_vec/
gpu_benchmarks.rs

1//! Comprehensive GPU vs CPU benchmarking for vector operations
2//!
3//! This module provides detailed benchmarks comparing GPU and CPU performance
4//! across all supported distance metrics, different vector dimensions, and dataset sizes.
5
6use crate::gpu::{GpuConfig, GpuVectorIndex};
7use crate::similarity::SimilarityMetric;
8use crate::Vector;
9use anyhow::Result;
10use scirs2_core::random;
11use scirs2_core::Rng;
12use std::time::{Duration, Instant};
13
14/// Benchmark configuration
15#[derive(Debug, Clone)]
16pub struct GpuBenchmarkConfig {
17    /// Number of vectors in the database
18    pub database_size: usize,
19    /// Number of query vectors
20    pub query_count: usize,
21    /// Vector dimensions to test
22    pub dimensions: Vec<usize>,
23    /// Distance metrics to benchmark
24    pub metrics: Vec<SimilarityMetric>,
25    /// Number of warmup iterations
26    pub warmup_iterations: usize,
27    /// Number of measurement iterations
28    pub measurement_iterations: usize,
29    /// Enable CPU baseline comparison
30    pub compare_cpu: bool,
31    /// Enable GPU acceleration
32    pub enable_gpu: bool,
33}
34
35impl Default for GpuBenchmarkConfig {
36    fn default() -> Self {
37        Self {
38            database_size: 10_000,
39            query_count: 100,
40            dimensions: vec![128, 256, 512, 768, 1024],
41            metrics: vec![
42                SimilarityMetric::Cosine,
43                SimilarityMetric::Euclidean,
44                SimilarityMetric::Manhattan,
45                SimilarityMetric::Pearson,
46                SimilarityMetric::Jaccard,
47                SimilarityMetric::Angular,
48            ],
49            warmup_iterations: 3,
50            measurement_iterations: 10,
51            compare_cpu: true,
52            enable_gpu: true,
53        }
54    }
55}
56
57/// Benchmark results for a single metric and dimension
58#[derive(Debug, Clone)]
59pub struct BenchmarkResult {
60    pub metric: SimilarityMetric,
61    pub dimension: usize,
62    pub database_size: usize,
63    pub query_count: usize,
64    pub cpu_time_ms: Option<f64>,
65    pub gpu_time_ms: Option<f64>,
66    pub speedup: Option<f64>,
67    pub throughput_qps: f64,
68    pub memory_usage_mb: f64,
69}
70
71impl BenchmarkResult {
72    /// Calculate speedup factor (GPU vs CPU)
73    fn calculate_speedup(&mut self) {
74        if let (Some(cpu_time), Some(gpu_time)) = (self.cpu_time_ms, self.gpu_time_ms) {
75            if gpu_time > 0.0 {
76                self.speedup = Some(cpu_time / gpu_time);
77            }
78        }
79    }
80
81    /// Calculate queries per second
82    fn calculate_throughput(&mut self) {
83        let time_ms = self.gpu_time_ms.or(self.cpu_time_ms).unwrap_or(1.0);
84        if time_ms > 0.0 {
85            self.throughput_qps = (self.query_count as f64 / time_ms) * 1000.0;
86        }
87    }
88}
89
90/// Comprehensive GPU benchmark suite
91pub struct GpuBenchmarkSuite {
92    config: GpuBenchmarkConfig,
93    results: Vec<BenchmarkResult>,
94}
95
96impl GpuBenchmarkSuite {
97    /// Create a new benchmark suite
98    pub fn new(config: GpuBenchmarkConfig) -> Self {
99        Self {
100            config,
101            results: Vec::new(),
102        }
103    }
104
105    /// Run all benchmarks
106    pub fn run(&mut self) -> Result<&[BenchmarkResult]> {
107        tracing::info!(
108            "Starting GPU benchmark suite with {} metrics, {} dimensions",
109            self.config.metrics.len(),
110            self.config.dimensions.len()
111        );
112
113        for &dim in &self.config.dimensions {
114            for metric in &self.config.metrics {
115                tracing::info!(
116                    "Benchmarking {} metric with dimension {}",
117                    format!("{:?}", metric),
118                    dim
119                );
120
121                let result = self.benchmark_metric(*metric, dim)?;
122                self.results.push(result);
123            }
124        }
125
126        Ok(&self.results)
127    }
128
129    /// Benchmark a single metric and dimension
130    fn benchmark_metric(&self, metric: SimilarityMetric, dim: usize) -> Result<BenchmarkResult> {
131        // Generate test data
132        let (database, queries) = self.generate_test_data(dim)?;
133
134        let mut result = BenchmarkResult {
135            metric,
136            dimension: dim,
137            database_size: self.config.database_size,
138            query_count: self.config.query_count,
139            cpu_time_ms: None,
140            gpu_time_ms: None,
141            speedup: None,
142            throughput_qps: 0.0,
143            memory_usage_mb: self.estimate_memory_usage(dim),
144        };
145
146        // CPU baseline
147        if self.config.compare_cpu {
148            result.cpu_time_ms = Some(self.benchmark_cpu(&database, &queries, metric)?);
149        }
150
151        // GPU benchmark
152        if self.config.enable_gpu {
153            match self.benchmark_gpu(&database, &queries, metric, dim) {
154                Ok(time) => result.gpu_time_ms = Some(time),
155                Err(e) => {
156                    tracing::warn!("GPU benchmark failed: {}, falling back to CPU-only", e);
157                }
158            }
159        }
160
161        result.calculate_speedup();
162        result.calculate_throughput();
163
164        Ok(result)
165    }
166
167    /// Generate synthetic test data
168    fn generate_test_data(&self, dim: usize) -> Result<(Vec<Vector>, Vec<Vector>)> {
169        let mut rng = random::rng();
170
171        let mut database = Vec::with_capacity(self.config.database_size);
172        for _i in 0..self.config.database_size {
173            let values: Vec<f32> = (0..dim).map(|_| rng.random_range(0.0..1.0)).collect();
174            database.push(Vector::new(values));
175        }
176
177        let mut queries = Vec::with_capacity(self.config.query_count);
178        for _i in 0..self.config.query_count {
179            let values: Vec<f32> = (0..dim).map(|_| rng.random_range(0.0..1.0)).collect();
180            queries.push(Vector::new(values));
181        }
182
183        Ok((database, queries))
184    }
185
186    /// Benchmark CPU implementation
187    fn benchmark_cpu(
188        &self,
189        database: &[Vector],
190        queries: &[Vector],
191        metric: SimilarityMetric,
192    ) -> Result<f64> {
193        // Warmup
194        for _ in 0..self.config.warmup_iterations {
195            for query in queries.iter().take(5) {
196                for db_vec in database.iter().take(100) {
197                    let _ = metric.compute(query, db_vec)?;
198                }
199            }
200        }
201
202        // Measurement
203        let mut total_time = Duration::ZERO;
204        for _ in 0..self.config.measurement_iterations {
205            let start = Instant::now();
206            for query in queries {
207                for db_vec in database {
208                    let _ = metric.compute(query, db_vec)?;
209                }
210            }
211            total_time += start.elapsed();
212        }
213
214        let avg_time_ms =
215            total_time.as_secs_f64() * 1000.0 / self.config.measurement_iterations as f64;
216        Ok(avg_time_ms)
217    }
218
219    /// Benchmark GPU implementation
220    fn benchmark_gpu(
221        &self,
222        database: &[Vector],
223        queries: &[Vector],
224        metric: SimilarityMetric,
225        _dim: usize,
226    ) -> Result<f64> {
227        let gpu_config = GpuConfig {
228            device_id: 0,
229            enable_tensor_cores: true,
230            enable_mixed_precision: true,
231            memory_pool_size: 1 << 30, // 1GB
232            stream_count: 4,
233            ..Default::default()
234        };
235
236        let mut gpu_index = GpuVectorIndex::new(gpu_config)?;
237        gpu_index.add_vectors(database.to_vec())?;
238
239        // Warmup
240        for _ in 0..self.config.warmup_iterations {
241            for query in queries.iter().take(5) {
242                let _ = gpu_index.search(query, 10, metric)?;
243            }
244        }
245
246        // Measurement
247        let mut total_time = Duration::ZERO;
248        for _ in 0..self.config.measurement_iterations {
249            let start = Instant::now();
250            for query in queries {
251                let _ = gpu_index.search(query, 10, metric)?;
252            }
253            total_time += start.elapsed();
254        }
255
256        let avg_time_ms =
257            total_time.as_secs_f64() * 1000.0 / self.config.measurement_iterations as f64;
258        Ok(avg_time_ms)
259    }
260
261    /// Estimate memory usage
262    fn estimate_memory_usage(&self, dim: usize) -> f64 {
263        let vector_size_bytes = dim * std::mem::size_of::<f32>();
264        let total_vectors = self.config.database_size + self.config.query_count;
265        let total_bytes = total_vectors * vector_size_bytes;
266        total_bytes as f64 / (1024.0 * 1024.0) // Convert to MB
267    }
268
269    /// Generate benchmark report
270    pub fn generate_report(&self) -> String {
271        let mut report = String::new();
272        report.push_str("=== GPU Benchmark Report ===\n\n");
273
274        report.push_str(&format!(
275            "Configuration:\n  Database size: {}\n  Query count: {}\n  Dimensions tested: {:?}\n\n",
276            self.config.database_size, self.config.query_count, self.config.dimensions
277        ));
278
279        report.push_str("Results:\n");
280        report.push_str(&format!(
281            "{:<20} {:<10} {:<12} {:<12} {:<10} {:<12}\n",
282            "Metric", "Dimension", "CPU (ms)", "GPU (ms)", "Speedup", "QPS"
283        ));
284        report.push_str(&"-".repeat(90));
285        report.push('\n');
286
287        for result in &self.results {
288            let cpu_time = result
289                .cpu_time_ms
290                .map(|t| format!("{:.2}", t))
291                .unwrap_or_else(|| "N/A".to_string());
292            let gpu_time = result
293                .gpu_time_ms
294                .map(|t| format!("{:.2}", t))
295                .unwrap_or_else(|| "N/A".to_string());
296            let speedup = result
297                .speedup
298                .map(|s| format!("{:.2}x", s))
299                .unwrap_or_else(|| "N/A".to_string());
300
301            report.push_str(&format!(
302                "{:<20} {:<10} {:<12} {:<12} {:<10} {:<12.0}\n",
303                format!("{:?}", result.metric),
304                result.dimension,
305                cpu_time,
306                gpu_time,
307                speedup,
308                result.throughput_qps
309            ));
310        }
311
312        report.push('\n');
313        self.add_summary_statistics(&mut report);
314
315        report
316    }
317
318    /// Add summary statistics to report
319    fn add_summary_statistics(&self, report: &mut String) {
320        if self.results.is_empty() {
321            return;
322        }
323
324        report.push_str("Summary Statistics:\n");
325
326        // Calculate average speedup
327        let speedups: Vec<f64> = self.results.iter().filter_map(|r| r.speedup).collect();
328
329        if !speedups.is_empty() {
330            let avg_speedup: f64 = speedups.iter().sum::<f64>() / speedups.len() as f64;
331            let max_speedup = speedups
332                .iter()
333                .copied()
334                .max_by(|a, b| a.partial_cmp(b).unwrap())
335                .unwrap();
336
337            report.push_str(&format!("  Average speedup: {:.2}x\n", avg_speedup));
338            report.push_str(&format!("  Maximum speedup: {:.2}x\n", max_speedup));
339        }
340
341        // Calculate total throughput
342        let total_qps: f64 = self.results.iter().map(|r| r.throughput_qps).sum();
343        report.push_str(&format!(
344            "  Total throughput: {:.0} queries/sec\n",
345            total_qps / self.results.len() as f64
346        ));
347
348        // Memory usage
349        let total_memory: f64 = self.results.iter().map(|r| r.memory_usage_mb).sum();
350        report.push_str(&format!(
351            "  Estimated memory: {:.2} MB\n",
352            total_memory / self.results.len() as f64
353        ));
354    }
355
356    /// Export results to JSON
357    pub fn export_json(&self) -> Result<String> {
358        #[derive(serde::Serialize)]
359        struct JsonResult {
360            metric: String,
361            dimension: usize,
362            database_size: usize,
363            query_count: usize,
364            cpu_time_ms: Option<f64>,
365            gpu_time_ms: Option<f64>,
366            speedup: Option<f64>,
367            throughput_qps: f64,
368            memory_usage_mb: f64,
369        }
370
371        let json_results: Vec<JsonResult> = self
372            .results
373            .iter()
374            .map(|r| JsonResult {
375                metric: format!("{:?}", r.metric),
376                dimension: r.dimension,
377                database_size: r.database_size,
378                query_count: r.query_count,
379                cpu_time_ms: r.cpu_time_ms,
380                gpu_time_ms: r.gpu_time_ms,
381                speedup: r.speedup,
382                throughput_qps: r.throughput_qps,
383                memory_usage_mb: r.memory_usage_mb,
384            })
385            .collect();
386
387        Ok(serde_json::to_string_pretty(&json_results)?)
388    }
389
390    /// Get benchmark results
391    pub fn results(&self) -> &[BenchmarkResult] {
392        &self.results
393    }
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399
400    #[test]
401    fn test_benchmark_config_default() {
402        let config = GpuBenchmarkConfig::default();
403        assert_eq!(config.database_size, 10_000);
404        assert_eq!(config.query_count, 100);
405        assert!(!config.dimensions.is_empty());
406        assert!(!config.metrics.is_empty());
407    }
408
409    #[test]
410    fn test_memory_estimation() {
411        let config = GpuBenchmarkConfig::default();
412        let suite = GpuBenchmarkSuite::new(config);
413        let memory_mb = suite.estimate_memory_usage(256);
414        assert!(memory_mb > 0.0);
415    }
416
417    #[test]
418    fn test_benchmark_result_calculation() {
419        let mut result = BenchmarkResult {
420            metric: SimilarityMetric::Cosine,
421            dimension: 128,
422            database_size: 1000,
423            query_count: 100,
424            cpu_time_ms: Some(100.0),
425            gpu_time_ms: Some(10.0),
426            speedup: None,
427            throughput_qps: 0.0,
428            memory_usage_mb: 10.0,
429        };
430
431        result.calculate_speedup();
432        assert_eq!(result.speedup, Some(10.0));
433
434        result.calculate_throughput();
435        assert!(result.throughput_qps > 0.0);
436    }
437
438    #[test]
439    fn test_generate_test_data() {
440        let config = GpuBenchmarkConfig {
441            database_size: 100,
442            query_count: 10,
443            ..Default::default()
444        };
445
446        let suite = GpuBenchmarkSuite::new(config);
447        let result = suite.generate_test_data(128);
448        assert!(result.is_ok());
449
450        let (database, queries) = result.unwrap();
451        assert_eq!(database.len(), 100);
452        assert_eq!(queries.len(), 10);
453    }
454}