oxirs-vec 0.2.4

//! CUDA runtime types and utilities

use std::time::Duration;

/// CUDA stream wrapper
#[derive(Debug)]
pub struct CudaStream {
    pub handle: *mut std::ffi::c_void,
    pub device_id: i32,
}

unsafe impl Send for CudaStream {}
unsafe impl Sync for CudaStream {}

impl CudaStream {
    pub fn new(device_id: i32) -> anyhow::Result<Self> {
        #[cfg(all(feature = "cuda", cuda_runtime_available))]
        {
            let _ = device_id;
            // CUDA functionality temporarily disabled for compilation compatibility
            // TODO: Implement proper CUDA runtime integration
            Err(anyhow::anyhow!("CUDA support needs proper implementation"))
        }

        #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
        {
            Ok(Self {
                handle: std::ptr::null_mut(),
                device_id,
            })
        }
    }

    pub fn synchronize(&self) -> anyhow::Result<()> {
        #[cfg(all(feature = "cuda", cuda_runtime_available))]
        {
            // CUDA functionality temporarily disabled
            Err(anyhow::anyhow!("CUDA support needs proper implementation"))
        }

        #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
        {
            Ok(())
        }
    }
}

impl Drop for CudaStream {
    fn drop(&mut self) {
        #[cfg(all(feature = "cuda", cuda_runtime_available))]
        {
            // CUDA cleanup temporarily disabled
        }
    }
}

/// CUDA kernel wrapper
#[derive(Debug)]
pub struct CudaKernel {
    pub function: *mut std::ffi::c_void,
    pub module: *mut std::ffi::c_void,
    pub name: String,
}

unsafe impl Send for CudaKernel {}
unsafe impl Sync for CudaKernel {}

impl CudaKernel {
    #[allow(unused_variables)]
    pub fn load(ptx_code: &str, function_name: &str) -> anyhow::Result<Self> {
        #[cfg(all(feature = "cuda", cuda_runtime_available))]
        {
            // CUDA functionality temporarily disabled for compilation compatibility
            Err(anyhow::anyhow!("CUDA support needs proper implementation"))
        }

        #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
        {
            Ok(Self {
                function: std::ptr::null_mut(),
                module: std::ptr::null_mut(),
                name: function_name.to_string(),
            })
        }
    }
}

impl Drop for CudaKernel {
    fn drop(&mut self) {
        #[cfg(all(feature = "cuda", cuda_runtime_available))]
        {
            // CUDA cleanup temporarily disabled
        }
    }
}

/// GPU performance statistics
#[derive(Debug, Default, Clone)]
pub struct GpuPerformanceStats {
    pub total_operations: u64,
    pub total_compute_time: Duration,
    pub total_memory_transfers: u64,
    pub total_transfer_time: Duration,
    pub peak_memory_usage: usize,
    pub current_memory_usage: usize,
}

impl GpuPerformanceStats {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn record_operation(&mut self, compute_time: Duration) {
        self.total_operations += 1;
        self.total_compute_time += compute_time;
    }

    pub fn record_transfer(&mut self, transfer_time: Duration) {
        self.total_memory_transfers += 1;
        self.total_transfer_time += transfer_time;
    }

    pub fn update_memory_usage(&mut self, current: usize) {
        self.current_memory_usage = current;
        if current > self.peak_memory_usage {
            self.peak_memory_usage = current;
        }
    }

    pub fn average_compute_time(&self) -> Duration {
        if self.total_operations > 0 {
            self.total_compute_time / self.total_operations as u32
        } else {
            Duration::ZERO
        }
    }

    pub fn average_transfer_time(&self) -> Duration {
        if self.total_memory_transfers > 0 {
            self.total_transfer_time / self.total_memory_transfers as u32
        } else {
            Duration::ZERO
        }
    }

    pub fn throughput_ops_per_sec(&self) -> f64 {
        if !self.total_compute_time.is_zero() {
            self.total_operations as f64 / self.total_compute_time.as_secs_f64()
        } else {
            0.0
        }
    }
}