oxirs_vec/gpu/
performance.rs1use std::time::{Duration, Instant};
4
5#[derive(Debug, Default, Clone)]
7pub struct GpuPerformanceStats {
8 pub total_operations: u64,
9 pub total_compute_time: Duration,
10 pub total_memory_transfers: u64,
11 pub total_transfer_time: Duration,
12 pub peak_memory_usage: usize,
13 pub current_memory_usage: usize,
14}
15
16impl GpuPerformanceStats {
17 pub fn new() -> Self {
18 Self::default()
19 }
20
21 pub fn record_compute_operation(&mut self, duration: Duration) {
23 self.total_operations += 1;
24 self.total_compute_time += duration;
25 }
26
27 pub fn record_memory_transfer(&mut self, duration: Duration) {
29 self.total_memory_transfers += 1;
30 self.total_transfer_time += duration;
31 }
32
33 pub fn update_memory_usage(&mut self, current_usage: usize) {
35 self.current_memory_usage = current_usage;
36 if current_usage > self.peak_memory_usage {
37 self.peak_memory_usage = current_usage;
38 }
39 }
40
41 pub fn average_compute_time(&self) -> Duration {
43 if self.total_operations > 0 {
44 self.total_compute_time / self.total_operations as u32
45 } else {
46 Duration::ZERO
47 }
48 }
49
50 pub fn average_transfer_time(&self) -> Duration {
52 if self.total_memory_transfers > 0 {
53 self.total_transfer_time / self.total_memory_transfers as u32
54 } else {
55 Duration::ZERO
56 }
57 }
58
59 pub fn compute_throughput(&self) -> f64 {
61 if self.total_compute_time.as_secs_f64() > 0.0 {
62 self.total_operations as f64 / self.total_compute_time.as_secs_f64()
63 } else {
64 0.0
65 }
66 }
67
68 pub fn memory_bandwidth(&self, total_bytes_transferred: usize) -> f64 {
70 if self.total_transfer_time.as_secs_f64() > 0.0 {
71 total_bytes_transferred as f64 / self.total_transfer_time.as_secs_f64()
72 } else {
73 0.0
74 }
75 }
76
77 pub fn reset(&mut self) {
79 *self = Self::default();
80 }
81
82 pub fn efficiency_ratio(&self) -> f64 {
84 let total_time = self.total_compute_time + self.total_transfer_time;
85 if total_time.as_secs_f64() > 0.0 {
86 self.total_compute_time.as_secs_f64() / total_time.as_secs_f64()
87 } else {
88 0.0
89 }
90 }
91
92 pub fn memory_utilization(&self, total_memory: usize) -> f64 {
94 if total_memory > 0 {
95 self.current_memory_usage as f64 / total_memory as f64
96 } else {
97 0.0
98 }
99 }
100}
101
102#[derive(Debug)]
104pub struct GpuTimer {
105 start: Instant,
106 operation_type: String,
107}
108
109impl GpuTimer {
110 pub fn start(operation_type: &str) -> Self {
111 Self {
112 start: Instant::now(),
113 operation_type: operation_type.to_string(),
114 }
115 }
116
117 pub fn stop(&self) -> Duration {
118 self.start.elapsed()
119 }
120
121 pub fn stop_and_record(&self, stats: &mut GpuPerformanceStats) -> Duration {
122 let duration = self.stop();
123 if self.operation_type.contains("transfer") {
124 stats.record_memory_transfer(duration);
125 } else {
126 stats.record_compute_operation(duration);
127 }
128 duration
129 }
130}
131
132pub struct GpuBenchmark;
134
135impl GpuBenchmark {
136 pub fn benchmark<F>(name: &str, iterations: usize, mut operation: F) -> BenchmarkResult
138 where
139 F: FnMut() -> anyhow::Result<()>,
140 {
141 let mut times = Vec::with_capacity(iterations);
142 let mut errors = 0;
143
144 for _ in 0..iterations {
145 let start = Instant::now();
146 match operation() {
147 Ok(_) => times.push(start.elapsed()),
148 Err(_) => errors += 1,
149 }
150 }
151
152 let total_time: Duration = times.iter().sum();
153 let avg_time = if !times.is_empty() {
154 total_time / times.len() as u32
155 } else {
156 Duration::ZERO
157 };
158
159 let min_time = times.iter().min().copied().unwrap_or(Duration::ZERO);
160 let max_time = times.iter().max().copied().unwrap_or(Duration::ZERO);
161
162 let avg_secs = avg_time.as_secs_f64();
164 let variance: f64 = times
165 .iter()
166 .map(|t| {
167 let diff = t.as_secs_f64() - avg_secs;
168 diff * diff
169 })
170 .sum::<f64>()
171 / times.len() as f64;
172 let std_dev = Duration::from_secs_f64(variance.sqrt());
173
174 BenchmarkResult {
175 name: name.to_string(),
176 iterations,
177 successful_iterations: times.len(),
178 errors,
179 total_time,
180 average_time: avg_time,
181 min_time,
182 max_time,
183 std_deviation: std_dev,
184 }
185 }
186}
187
188#[derive(Debug, Clone)]
190pub struct BenchmarkResult {
191 pub name: String,
192 pub iterations: usize,
193 pub successful_iterations: usize,
194 pub errors: usize,
195 pub total_time: Duration,
196 pub average_time: Duration,
197 pub min_time: Duration,
198 pub max_time: Duration,
199 pub std_deviation: Duration,
200}
201
202impl BenchmarkResult {
203 pub fn throughput(&self) -> f64 {
205 if self.total_time.as_secs_f64() > 0.0 {
206 self.successful_iterations as f64 / self.total_time.as_secs_f64()
207 } else {
208 0.0
209 }
210 }
211
212 pub fn success_rate(&self) -> f64 {
214 if self.iterations > 0 {
215 self.successful_iterations as f64 / self.iterations as f64
216 } else {
217 0.0
218 }
219 }
220
221 pub fn print(&self) {
223 println!("Benchmark: {}", self.name);
224 println!(
225 " Iterations: {} (success: {}, errors: {})",
226 self.iterations, self.successful_iterations, self.errors
227 );
228 println!(" Total time: {:?}", self.total_time);
229 println!(" Average time: {:?}", self.average_time);
230 println!(" Min/Max time: {:?} / {:?}", self.min_time, self.max_time);
231 println!(" Std deviation: {:?}", self.std_deviation);
232 println!(" Throughput: {:.2} ops/sec", self.throughput());
233 println!(" Success rate: {:.2}%", self.success_rate() * 100.0);
234 }
235}