1use std::time::Duration;
4
5#[derive(Debug)]
7pub struct CudaStream {
8 pub handle: *mut std::ffi::c_void,
9 pub device_id: i32,
10}
11
12unsafe impl Send for CudaStream {}
13unsafe impl Sync for CudaStream {}
14
15impl CudaStream {
16 pub fn new(device_id: i32) -> anyhow::Result<Self> {
17 #[cfg(all(feature = "cuda", cuda_runtime_available))]
18 {
19 let _ = device_id;
20 Err(anyhow::anyhow!("CUDA support needs proper implementation"))
23 }
24
25 #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
26 {
27 Ok(Self {
28 handle: std::ptr::null_mut(),
29 device_id,
30 })
31 }
32 }
33
34 pub fn synchronize(&self) -> anyhow::Result<()> {
35 #[cfg(all(feature = "cuda", cuda_runtime_available))]
36 {
37 Err(anyhow::anyhow!("CUDA support needs proper implementation"))
39 }
40
41 #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
42 {
43 Ok(())
44 }
45 }
46}
47
48impl Drop for CudaStream {
49 fn drop(&mut self) {
50 #[cfg(all(feature = "cuda", cuda_runtime_available))]
51 {
52 }
54 }
55}
56
57#[derive(Debug)]
59pub struct CudaKernel {
60 pub function: *mut std::ffi::c_void,
61 pub module: *mut std::ffi::c_void,
62 pub name: String,
63}
64
65unsafe impl Send for CudaKernel {}
66unsafe impl Sync for CudaKernel {}
67
68impl CudaKernel {
69 #[allow(unused_variables)]
70 pub fn load(ptx_code: &str, function_name: &str) -> anyhow::Result<Self> {
71 #[cfg(all(feature = "cuda", cuda_runtime_available))]
72 {
73 Err(anyhow::anyhow!("CUDA support needs proper implementation"))
75 }
76
77 #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
78 {
79 Ok(Self {
80 function: std::ptr::null_mut(),
81 module: std::ptr::null_mut(),
82 name: function_name.to_string(),
83 })
84 }
85 }
86}
87
88impl Drop for CudaKernel {
89 fn drop(&mut self) {
90 #[cfg(all(feature = "cuda", cuda_runtime_available))]
91 {
92 }
94 }
95}
96
97#[derive(Debug, Default, Clone)]
99pub struct GpuPerformanceStats {
100 pub total_operations: u64,
101 pub total_compute_time: Duration,
102 pub total_memory_transfers: u64,
103 pub total_transfer_time: Duration,
104 pub peak_memory_usage: usize,
105 pub current_memory_usage: usize,
106}
107
108impl GpuPerformanceStats {
109 pub fn new() -> Self {
110 Self::default()
111 }
112
113 pub fn record_operation(&mut self, compute_time: Duration) {
114 self.total_operations += 1;
115 self.total_compute_time += compute_time;
116 }
117
118 pub fn record_transfer(&mut self, transfer_time: Duration) {
119 self.total_memory_transfers += 1;
120 self.total_transfer_time += transfer_time;
121 }
122
123 pub fn update_memory_usage(&mut self, current: usize) {
124 self.current_memory_usage = current;
125 if current > self.peak_memory_usage {
126 self.peak_memory_usage = current;
127 }
128 }
129
130 pub fn average_compute_time(&self) -> Duration {
131 if self.total_operations > 0 {
132 self.total_compute_time / self.total_operations as u32
133 } else {
134 Duration::ZERO
135 }
136 }
137
138 pub fn average_transfer_time(&self) -> Duration {
139 if self.total_memory_transfers > 0 {
140 self.total_transfer_time / self.total_memory_transfers as u32
141 } else {
142 Duration::ZERO
143 }
144 }
145
146 pub fn throughput_ops_per_sec(&self) -> f64 {
147 if !self.total_compute_time.is_zero() {
148 self.total_operations as f64 / self.total_compute_time.as_secs_f64()
149 } else {
150 0.0
151 }
152 }
153}