oxirs_vec/gpu/
performance.rs

1//! GPU performance monitoring and statistics
2
3use std::time::{Duration, Instant};
4
5/// GPU performance statistics
6#[derive(Debug, Default, Clone)]
7pub struct GpuPerformanceStats {
8    pub total_operations: u64,
9    pub total_compute_time: Duration,
10    pub total_memory_transfers: u64,
11    pub total_transfer_time: Duration,
12    pub peak_memory_usage: usize,
13    pub current_memory_usage: usize,
14}
15
16impl GpuPerformanceStats {
17    pub fn new() -> Self {
18        Self::default()
19    }
20
21    /// Record a compute operation
22    pub fn record_compute_operation(&mut self, duration: Duration) {
23        self.total_operations += 1;
24        self.total_compute_time += duration;
25    }
26
27    /// Record a memory transfer
28    pub fn record_memory_transfer(&mut self, duration: Duration) {
29        self.total_memory_transfers += 1;
30        self.total_transfer_time += duration;
31    }
32
33    /// Update memory usage
34    pub fn update_memory_usage(&mut self, current_usage: usize) {
35        self.current_memory_usage = current_usage;
36        if current_usage > self.peak_memory_usage {
37            self.peak_memory_usage = current_usage;
38        }
39    }
40
41    /// Get average compute time per operation
42    pub fn average_compute_time(&self) -> Duration {
43        if self.total_operations > 0 {
44            self.total_compute_time / self.total_operations as u32
45        } else {
46            Duration::ZERO
47        }
48    }
49
50    /// Get average transfer time
51    pub fn average_transfer_time(&self) -> Duration {
52        if self.total_memory_transfers > 0 {
53            self.total_transfer_time / self.total_memory_transfers as u32
54        } else {
55            Duration::ZERO
56        }
57    }
58
59    /// Get compute throughput (operations per second)
60    pub fn compute_throughput(&self) -> f64 {
61        if self.total_compute_time.as_secs_f64() > 0.0 {
62            self.total_operations as f64 / self.total_compute_time.as_secs_f64()
63        } else {
64            0.0
65        }
66    }
67
68    /// Get memory bandwidth (bytes per second)
69    pub fn memory_bandwidth(&self, total_bytes_transferred: usize) -> f64 {
70        if self.total_transfer_time.as_secs_f64() > 0.0 {
71            total_bytes_transferred as f64 / self.total_transfer_time.as_secs_f64()
72        } else {
73            0.0
74        }
75    }
76
77    /// Reset all statistics
78    pub fn reset(&mut self) {
79        *self = Self::default();
80    }
81
82    /// Get efficiency ratio (compute time / total time)
83    pub fn efficiency_ratio(&self) -> f64 {
84        let total_time = self.total_compute_time + self.total_transfer_time;
85        if total_time.as_secs_f64() > 0.0 {
86            self.total_compute_time.as_secs_f64() / total_time.as_secs_f64()
87        } else {
88            0.0
89        }
90    }
91
92    /// Get memory utilization ratio
93    pub fn memory_utilization(&self, total_memory: usize) -> f64 {
94        if total_memory > 0 {
95            self.current_memory_usage as f64 / total_memory as f64
96        } else {
97            0.0
98        }
99    }
100}
101
102/// Performance timer for GPU operations
103#[derive(Debug)]
104pub struct GpuTimer {
105    start: Instant,
106    operation_type: String,
107}
108
109impl GpuTimer {
110    pub fn start(operation_type: &str) -> Self {
111        Self {
112            start: Instant::now(),
113            operation_type: operation_type.to_string(),
114        }
115    }
116
117    pub fn stop(&self) -> Duration {
118        self.start.elapsed()
119    }
120
121    pub fn stop_and_record(&self, stats: &mut GpuPerformanceStats) -> Duration {
122        let duration = self.stop();
123        if self.operation_type.contains("transfer") {
124            stats.record_memory_transfer(duration);
125        } else {
126            stats.record_compute_operation(duration);
127        }
128        duration
129    }
130}
131
132/// Benchmarking utilities for GPU operations
133pub struct GpuBenchmark;
134
135impl GpuBenchmark {
136    /// Benchmark a closure multiple times and return statistics
137    pub fn benchmark<F>(name: &str, iterations: usize, mut operation: F) -> BenchmarkResult
138    where
139        F: FnMut() -> anyhow::Result<()>,
140    {
141        let mut times = Vec::with_capacity(iterations);
142        let mut errors = 0;
143
144        for _ in 0..iterations {
145            let start = Instant::now();
146            match operation() {
147                Ok(_) => times.push(start.elapsed()),
148                Err(_) => errors += 1,
149            }
150        }
151
152        let total_time: Duration = times.iter().sum();
153        let avg_time = if !times.is_empty() {
154            total_time / times.len() as u32
155        } else {
156            Duration::ZERO
157        };
158
159        let min_time = times.iter().min().copied().unwrap_or(Duration::ZERO);
160        let max_time = times.iter().max().copied().unwrap_or(Duration::ZERO);
161
162        // Calculate standard deviation
163        let avg_secs = avg_time.as_secs_f64();
164        let variance: f64 = times
165            .iter()
166            .map(|t| {
167                let diff = t.as_secs_f64() - avg_secs;
168                diff * diff
169            })
170            .sum::<f64>()
171            / times.len() as f64;
172        let std_dev = Duration::from_secs_f64(variance.sqrt());
173
174        BenchmarkResult {
175            name: name.to_string(),
176            iterations,
177            successful_iterations: times.len(),
178            errors,
179            total_time,
180            average_time: avg_time,
181            min_time,
182            max_time,
183            std_deviation: std_dev,
184        }
185    }
186}
187
188/// Result of a GPU benchmark
189#[derive(Debug, Clone)]
190pub struct BenchmarkResult {
191    pub name: String,
192    pub iterations: usize,
193    pub successful_iterations: usize,
194    pub errors: usize,
195    pub total_time: Duration,
196    pub average_time: Duration,
197    pub min_time: Duration,
198    pub max_time: Duration,
199    pub std_deviation: Duration,
200}
201
202impl BenchmarkResult {
203    /// Get throughput (operations per second)
204    pub fn throughput(&self) -> f64 {
205        if self.total_time.as_secs_f64() > 0.0 {
206            self.successful_iterations as f64 / self.total_time.as_secs_f64()
207        } else {
208            0.0
209        }
210    }
211
212    /// Get success rate
213    pub fn success_rate(&self) -> f64 {
214        if self.iterations > 0 {
215            self.successful_iterations as f64 / self.iterations as f64
216        } else {
217            0.0
218        }
219    }
220
221    /// Print benchmark results
222    pub fn print(&self) {
223        println!("Benchmark: {}", self.name);
224        println!(
225            "  Iterations: {} (success: {}, errors: {})",
226            self.iterations, self.successful_iterations, self.errors
227        );
228        println!("  Total time: {:?}", self.total_time);
229        println!("  Average time: {:?}", self.average_time);
230        println!("  Min/Max time: {:?} / {:?}", self.min_time, self.max_time);
231        println!("  Std deviation: {:?}", self.std_deviation);
232        println!("  Throughput: {:.2} ops/sec", self.throughput());
233        println!("  Success rate: {:.2}%", self.success_rate() * 100.0);
234    }
235}