oxirs_vec/gpu/
device.rs

1//! GPU device information and management
2
3#[cfg(all(feature = "cuda", cuda_runtime_available))]
4use anyhow::anyhow;
5use anyhow::Result;
6
7/// GPU device information
8#[derive(Debug, Clone)]
9pub struct GpuDevice {
10    pub device_id: i32,
11    pub name: String,
12    pub compute_capability: (i32, i32),
13    pub total_memory: usize,
14    pub free_memory: usize,
15    pub max_threads_per_block: i32,
16    pub max_blocks_per_grid: i32,
17    pub warp_size: i32,
18    pub memory_bandwidth: f32,
19    pub peak_flops: f64,
20}
21
22impl GpuDevice {
23    /// Create a simulated GPU device for testing or when CUDA device is unavailable
24    fn simulated(device_id: i32) -> Self {
25        Self {
26            device_id,
27            name: format!("Simulated GPU {device_id}"),
28            compute_capability: (7, 5),
29            total_memory: 8 * 1024 * 1024 * 1024,
30            free_memory: 6 * 1024 * 1024 * 1024,
31            max_threads_per_block: 1024,
32            max_blocks_per_grid: 65535,
33            warp_size: 32,
34            memory_bandwidth: 900.0,
35            peak_flops: 14000.0,
36        }
37    }
38
39    /// Get information about a specific GPU device
40    pub fn get_device_info(device_id: i32) -> Result<Self> {
41        #[cfg(all(feature = "cuda", cuda_runtime_available))]
42        {
43            use cuda_runtime_sys::*;
44            unsafe {
45                let result = cudaSetDevice(device_id);
46                if result != cudaError_t::cudaSuccess {
47                    // Device not available - fall back to simulated device
48                    tracing::warn!(
49                        "CUDA device {} not available - using simulated GPU device",
50                        device_id
51                    );
52                    return Ok(Self::simulated(device_id));
53                }
54
55                let mut props: cudaDeviceProp = std::mem::zeroed();
56                let result = cudaGetDeviceProperties(&mut props, device_id);
57                if result != cudaError_t::cudaSuccess {
58                    tracing::warn!(
59                        "Failed to get properties for CUDA device {} - using simulated GPU device",
60                        device_id
61                    );
62                    return Ok(Self::simulated(device_id));
63                }
64
65                let mut free_mem: usize = 0;
66                let mut total_mem: usize = 0;
67                let result = cudaMemGetInfo(&mut free_mem, &mut total_mem);
68                if result != cudaError_t::cudaSuccess {
69                    tracing::warn!(
70                        "Failed to get memory info for CUDA device {} - using simulated GPU device",
71                        device_id
72                    );
73                    return Ok(Self::simulated(device_id));
74                }
75
76                Ok(Self {
77                    device_id,
78                    name: std::ffi::CStr::from_ptr(props.name.as_ptr())
79                        .to_string_lossy()
80                        .to_string(),
81                    compute_capability: (props.major, props.minor),
82                    total_memory: total_mem,
83                    free_memory: free_mem,
84                    max_threads_per_block: props.maxThreadsPerBlock,
85                    max_blocks_per_grid: props.maxGridSize[0],
86                    warp_size: props.warpSize,
87                    memory_bandwidth: props.memoryBusWidth as f32
88                        * props.memoryClockRate as f32
89                        * 2.0
90                        / 8.0
91                        / 1e6,
92                    peak_flops: props.clockRate as f64
93                        * props.multiProcessorCount as f64
94                        * props.maxThreadsPerMultiProcessor as f64
95                        / 1e6,
96                })
97            }
98        }
99
100        #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
101        {
102            // Fallback for testing without CUDA or when CUDA toolkit not installed
103            tracing::warn!("CUDA not available - using simulated GPU device");
104            Ok(Self::simulated(device_id))
105        }
106    }
107
108    /// Get information about all available GPU devices
109    pub fn get_all_devices() -> Result<Vec<Self>> {
110        #[cfg(all(feature = "cuda", cuda_runtime_available))]
111        {
112            use cuda_runtime_sys::*;
113            unsafe {
114                let mut device_count: i32 = 0;
115                let result = cudaGetDeviceCount(&mut device_count);
116                if result != cudaError_t::cudaSuccess {
117                    return Err(anyhow!("Failed to get device count"));
118                }
119
120                let mut devices = Vec::new();
121                for i in 0..device_count {
122                    if let Ok(device) = Self::get_device_info(i) {
123                        devices.push(device);
124                    }
125                }
126                Ok(devices)
127            }
128        }
129
130        #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
131        {
132            // Fallback: simulate 2 GPUs for testing when CUDA not available
133            tracing::warn!("CUDA not available - using simulated GPU devices");
134            Ok(vec![Self::get_device_info(0)?, Self::get_device_info(1)?])
135        }
136    }
137
138    /// Check if this device supports a specific compute capability
139    pub fn supports_compute_capability(&self, major: i32, minor: i32) -> bool {
140        self.compute_capability.0 > major
141            || (self.compute_capability.0 == major && self.compute_capability.1 >= minor)
142    }
143
144    /// Get theoretical peak memory bandwidth in GB/s
145    pub fn peak_memory_bandwidth(&self) -> f32 {
146        self.memory_bandwidth
147    }
148
149    /// Get theoretical peak compute performance in GFLOPS
150    pub fn peak_compute_performance(&self) -> f64 {
151        self.peak_flops
152    }
153
154    /// Calculate optimal thread block configuration for given problem size
155    pub fn calculate_optimal_block_config(&self, problem_size: usize) -> (i32, i32) {
156        let optimal_threads = (self.max_threads_per_block as f32 * 0.75) as i32; // Use 75% of max
157        let blocks_needed = ((problem_size as f32) / (optimal_threads as f32)).ceil() as i32;
158        let blocks = blocks_needed.min(self.max_blocks_per_grid);
159        (blocks, optimal_threads)
160    }
161}
oxirs_vec/gpu/device.rs

oxirs_vec/gpu/
device.rs