oxirs_vec/gpu/
device.rs

1//! GPU device information and management
2
3#[cfg(all(feature = "cuda", cuda_runtime_available))]
4use anyhow::anyhow;
5use anyhow::Result;
6
7/// GPU device information
8#[derive(Debug, Clone)]
9pub struct GpuDevice {
10    pub device_id: i32,
11    pub name: String,
12    pub compute_capability: (i32, i32),
13    pub total_memory: usize,
14    pub free_memory: usize,
15    pub max_threads_per_block: i32,
16    pub max_blocks_per_grid: i32,
17    pub warp_size: i32,
18    pub memory_bandwidth: f32,
19    pub peak_flops: f64,
20}
21
22impl GpuDevice {
23    /// Get information about a specific GPU device
24    pub fn get_device_info(device_id: i32) -> Result<Self> {
25        #[cfg(all(feature = "cuda", cuda_runtime_available))]
26        {
27            use cuda_runtime_sys::*;
28            unsafe {
29                let result = cudaSetDevice(device_id);
30                if result != cudaError_t::cudaSuccess {
31                    return Err(anyhow!("Failed to set CUDA device {}", device_id));
32                }
33
34                let mut props: cudaDeviceProp = std::mem::zeroed();
35                let result = cudaGetDeviceProperties(&mut props, device_id);
36                if result != cudaError_t::cudaSuccess {
37                    return Err(anyhow!("Failed to get device properties"));
38                }
39
40                let mut free_mem: usize = 0;
41                let mut total_mem: usize = 0;
42                let result = cudaMemGetInfo(&mut free_mem, &mut total_mem);
43                if result != cudaError_t::cudaSuccess {
44                    return Err(anyhow!("Failed to get memory info"));
45                }
46
47                Ok(Self {
48                    device_id,
49                    name: std::ffi::CStr::from_ptr(props.name.as_ptr())
50                        .to_string_lossy()
51                        .to_string(),
52                    compute_capability: (props.major, props.minor),
53                    total_memory: total_mem,
54                    free_memory: free_mem,
55                    max_threads_per_block: props.maxThreadsPerBlock,
56                    max_blocks_per_grid: props.maxGridSize[0],
57                    warp_size: props.warpSize,
58                    memory_bandwidth: props.memoryBusWidth as f32
59                        * props.memoryClockRate as f32
60                        * 2.0
61                        / 8.0
62                        / 1e6,
63                    peak_flops: props.clockRate as f64
64                        * props.multiProcessorCount as f64
65                        * props.maxThreadsPerMultiProcessor as f64
66                        / 1e6,
67                })
68            }
69        }
70
71        #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
72        {
73            // Fallback for testing without CUDA or when CUDA toolkit not installed
74            tracing::warn!("CUDA not available - using simulated GPU device");
75            Ok(Self {
76                device_id,
77                name: format!("Simulated GPU {device_id}"),
78                compute_capability: (7, 5), // Simulate modern GPU
79                total_memory: 8 * 1024 * 1024 * 1024, // 8GB
80                free_memory: 6 * 1024 * 1024 * 1024, // 6GB free
81                max_threads_per_block: 1024,
82                max_blocks_per_grid: 65535,
83                warp_size: 32,
84                memory_bandwidth: 900.0, // GB/s
85                peak_flops: 14000.0,     // GFLOPS
86            })
87        }
88    }
89
90    /// Get information about all available GPU devices
91    pub fn get_all_devices() -> Result<Vec<Self>> {
92        #[cfg(all(feature = "cuda", cuda_runtime_available))]
93        {
94            use cuda_runtime_sys::*;
95            unsafe {
96                let mut device_count: i32 = 0;
97                let result = cudaGetDeviceCount(&mut device_count);
98                if result != cudaError_t::cudaSuccess {
99                    return Err(anyhow!("Failed to get device count"));
100                }
101
102                let mut devices = Vec::new();
103                for i in 0..device_count {
104                    if let Ok(device) = Self::get_device_info(i) {
105                        devices.push(device);
106                    }
107                }
108                Ok(devices)
109            }
110        }
111
112        #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
113        {
114            // Fallback: simulate 2 GPUs for testing when CUDA not available
115            tracing::warn!("CUDA not available - using simulated GPU devices");
116            Ok(vec![Self::get_device_info(0)?, Self::get_device_info(1)?])
117        }
118    }
119
120    /// Check if this device supports a specific compute capability
121    pub fn supports_compute_capability(&self, major: i32, minor: i32) -> bool {
122        self.compute_capability.0 > major
123            || (self.compute_capability.0 == major && self.compute_capability.1 >= minor)
124    }
125
126    /// Get theoretical peak memory bandwidth in GB/s
127    pub fn peak_memory_bandwidth(&self) -> f32 {
128        self.memory_bandwidth
129    }
130
131    /// Get theoretical peak compute performance in GFLOPS
132    pub fn peak_compute_performance(&self) -> f64 {
133        self.peak_flops
134    }
135
136    /// Calculate optimal thread block configuration for given problem size
137    pub fn calculate_optimal_block_config(&self, problem_size: usize) -> (i32, i32) {
138        let optimal_threads = (self.max_threads_per_block as f32 * 0.75) as i32; // Use 75% of max
139        let blocks_needed = ((problem_size as f32) / (optimal_threads as f32)).ceil() as i32;
140        let blocks = blocks_needed.min(self.max_blocks_per_grid);
141        (blocks, optimal_threads)
142    }
143}