use std::time::Duration;
#[derive(Debug)]
pub struct CudaStream {
pub handle: *mut std::ffi::c_void,
pub device_id: i32,
}
unsafe impl Send for CudaStream {}
unsafe impl Sync for CudaStream {}
impl CudaStream {
pub fn new(device_id: i32) -> anyhow::Result<Self> {
#[cfg(all(feature = "cuda", cuda_runtime_available))]
{
let _ = device_id;
Err(anyhow::anyhow!("CUDA support needs proper implementation"))
}
#[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
{
Ok(Self {
handle: std::ptr::null_mut(),
device_id,
})
}
}
pub fn synchronize(&self) -> anyhow::Result<()> {
#[cfg(all(feature = "cuda", cuda_runtime_available))]
{
Err(anyhow::anyhow!("CUDA support needs proper implementation"))
}
#[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
{
Ok(())
}
}
}
impl Drop for CudaStream {
fn drop(&mut self) {
#[cfg(all(feature = "cuda", cuda_runtime_available))]
{
}
}
}
#[derive(Debug)]
pub struct CudaKernel {
pub function: *mut std::ffi::c_void,
pub module: *mut std::ffi::c_void,
pub name: String,
}
unsafe impl Send for CudaKernel {}
unsafe impl Sync for CudaKernel {}
impl CudaKernel {
#[allow(unused_variables)]
pub fn load(ptx_code: &str, function_name: &str) -> anyhow::Result<Self> {
#[cfg(all(feature = "cuda", cuda_runtime_available))]
{
Err(anyhow::anyhow!("CUDA support needs proper implementation"))
}
#[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
{
Ok(Self {
function: std::ptr::null_mut(),
module: std::ptr::null_mut(),
name: function_name.to_string(),
})
}
}
}
impl Drop for CudaKernel {
fn drop(&mut self) {
#[cfg(all(feature = "cuda", cuda_runtime_available))]
{
}
}
}
#[derive(Debug, Default, Clone)]
pub struct GpuPerformanceStats {
pub total_operations: u64,
pub total_compute_time: Duration,
pub total_memory_transfers: u64,
pub total_transfer_time: Duration,
pub peak_memory_usage: usize,
pub current_memory_usage: usize,
}
impl GpuPerformanceStats {
pub fn new() -> Self {
Self::default()
}
pub fn record_operation(&mut self, compute_time: Duration) {
self.total_operations += 1;
self.total_compute_time += compute_time;
}
pub fn record_transfer(&mut self, transfer_time: Duration) {
self.total_memory_transfers += 1;
self.total_transfer_time += transfer_time;
}
pub fn update_memory_usage(&mut self, current: usize) {
self.current_memory_usage = current;
if current > self.peak_memory_usage {
self.peak_memory_usage = current;
}
}
pub fn average_compute_time(&self) -> Duration {
if self.total_operations > 0 {
self.total_compute_time / self.total_operations as u32
} else {
Duration::ZERO
}
}
pub fn average_transfer_time(&self) -> Duration {
if self.total_memory_transfers > 0 {
self.total_transfer_time / self.total_memory_transfers as u32
} else {
Duration::ZERO
}
}
pub fn throughput_ops_per_sec(&self) -> f64 {
if !self.total_compute_time.is_zero() {
self.total_operations as f64 / self.total_compute_time.as_secs_f64()
} else {
0.0
}
}
}