scirs2_series/gpu_acceleration/
device_manager.rs1use std::fmt::Debug;
7
8use super::config::{GpuBackend, GpuCapabilities, TensorCoresGeneration};
9use crate::error::{Result, TimeSeriesError};
10
11#[derive(Debug)]
13pub struct GpuDeviceManager {
14 devices: Vec<GpuCapabilities>,
16 current_device: Option<usize>,
18}
19
20impl GpuDeviceManager {
21 pub fn new() -> Result<Self> {
23 let mut devices = Vec::new();
25
26 if let Some(cuda_devices) = Self::detect_cuda_devices() {
28 devices.extend(cuda_devices);
29 }
30
31 if let Some(opencl_devices) = Self::detect_opencl_devices() {
33 devices.extend(opencl_devices);
34 }
35
36 if let Some(metal_devices) = Self::detect_metal_devices() {
38 devices.extend(metal_devices);
39 }
40
41 if let Some(rocm_devices) = Self::detect_rocm_devices() {
43 devices.extend(rocm_devices);
44 }
45
46 if devices.is_empty() {
48 devices.push(GpuCapabilities {
49 backend: GpuBackend::CpuFallback,
50 compute_capability: None,
51 memory: Self::get_system_memory(),
52 multiprocessors: Self::get_cpu_cores(),
53 supports_fp16: false,
54 supports_tensor_cores: false,
55 max_threads_per_block: 1,
56 tensor_cores_generation: None,
57 memory_bandwidth: 100.0, tensor_performance: None,
59 });
60 }
61
62 Ok(Self {
63 devices,
64 current_device: Some(0), })
66 }
67
68 pub fn get_devices(&self) -> &[GpuCapabilities] {
70 &self.devices
71 }
72
73 pub fn set_device(&mut self, deviceid: usize) -> Result<()> {
75 if deviceid >= self.devices.len() {
76 return Err(TimeSeriesError::InvalidInput(format!(
77 "Device {deviceid} not available"
78 )));
79 }
80 self.current_device = Some(deviceid);
81 Ok(())
82 }
83
84 pub fn current_device_capabilities(&self) -> Option<&GpuCapabilities> {
86 self.current_device.map(|id| &self.devices[id])
87 }
88
89 pub fn is_gpu_available(&self) -> bool {
91 self.devices
92 .iter()
93 .any(|dev| !matches!(dev.backend, GpuBackend::CpuFallback))
94 }
95
96 fn detect_cuda_devices() -> Option<Vec<GpuCapabilities>> {
98 #[cfg(target_os = "linux")]
101 {
102 if std::path::Path::new("/dev/nvidia0").exists()
103 || std::path::Path::new("/proc/driver/nvidia").exists()
104 {
105 return Some(vec![GpuCapabilities {
106 backend: GpuBackend::Cuda,
107 compute_capability: Some((8, 0)), memory: 40 * 1024 * 1024 * 1024, multiprocessors: 108,
110 supports_fp16: true,
111 supports_tensor_cores: true,
112 max_threads_per_block: 1024,
113 tensor_cores_generation: Some(TensorCoresGeneration::V3), memory_bandwidth: 1555.0, tensor_performance: Some(312.0), }]);
117 }
118 }
119
120 #[cfg(target_os = "windows")]
121 {
122 }
125
126 None
127 }
128
129 fn detect_opencl_devices() -> Option<Vec<GpuCapabilities>> {
131 #[cfg(any(target_os = "linux", target_os = "windows", target_os = "macos"))]
134 {
135 if Self::has_opencl_drivers() {
138 return Some(vec![GpuCapabilities {
139 backend: GpuBackend::OpenCL,
140 compute_capability: None,
141 memory: 8 * 1024 * 1024 * 1024, multiprocessors: 64,
143 supports_fp16: true,
144 supports_tensor_cores: false,
145 max_threads_per_block: 256,
146 tensor_cores_generation: None,
147 memory_bandwidth: 500.0, tensor_performance: None,
149 }]);
150 }
151 }
152
153 None
154 }
155
156 fn detect_metal_devices() -> Option<Vec<GpuCapabilities>> {
158 #[cfg(target_os = "macos")]
159 {
160 if Self::is_apple_silicon() || Self::has_metal_gpu() {
162 return Some(vec![GpuCapabilities {
163 backend: GpuBackend::Metal,
164 compute_capability: None,
165 memory: 16 * 1024 * 1024 * 1024, multiprocessors: 32, supports_fp16: true,
168 supports_tensor_cores: true, max_threads_per_block: 1024,
170 tensor_cores_generation: Some(TensorCoresGeneration::V3), memory_bandwidth: 400.0, tensor_performance: Some(15.8), }]);
174 }
175 }
176
177 None
178 }
179
180 fn detect_rocm_devices() -> Option<Vec<GpuCapabilities>> {
182 #[cfg(target_os = "linux")]
183 {
184 if std::path::Path::new("/opt/rocm").exists()
186 || std::path::Path::new("/dev/kfd").exists()
187 {
188 return Some(vec![GpuCapabilities {
189 backend: GpuBackend::Rocm,
190 compute_capability: None,
191 memory: 32 * 1024 * 1024 * 1024, multiprocessors: 120,
193 supports_fp16: true,
194 supports_tensor_cores: false, max_threads_per_block: 1024,
196 tensor_cores_generation: None, memory_bandwidth: 1600.0, tensor_performance: Some(383.0), }]);
200 }
201 }
202
203 None
204 }
205
206 fn has_opencl_drivers() -> bool {
208 #[cfg(target_os = "linux")]
209 {
210 std::path::Path::new("/usr/lib/x86_64-linux-gnu/libOpenCL.so").exists()
211 || std::path::Path::new("/usr/lib64/libOpenCL.so").exists()
212 }
213 #[cfg(target_os = "windows")]
214 {
215 std::path::Path::new("C:/Windows/System32/OpenCL.dll").exists()
216 }
217 #[cfg(target_os = "macos")]
218 {
219 std::path::Path::new("/System/Library/Frameworks/OpenCL.framework").exists()
220 }
221 #[cfg(not(any(target_os = "linux", target_os = "windows", target_os = "macos")))]
222 {
223 false
224 }
225 }
226
227 #[cfg(target_os = "macos")]
229 #[allow(dead_code)]
230 fn is_apple_silicon() -> bool {
231 std::env::consts::ARCH == "aarch64"
232 }
233
234 #[cfg(not(target_os = "macos"))]
235 #[allow(dead_code)]
236 fn is_apple_silicon() -> bool {
237 false
238 }
239
240 #[cfg(target_os = "macos")]
242 #[allow(dead_code)]
243 fn has_metal_gpu() -> bool {
244 std::path::Path::new("/System/Library/Frameworks/Metal.framework").exists()
245 }
246
247 #[cfg(not(target_os = "macos"))]
248 #[allow(dead_code)]
249 fn has_metal_gpu() -> bool {
250 false
251 }
252
253 fn get_system_memory() -> usize {
255 #[cfg(target_os = "linux")]
256 {
257 if let Ok(contents) = std::fs::read_to_string("/proc/meminfo") {
259 for line in contents.lines() {
260 if line.starts_with("MemTotal:") {
261 if let Some(kb_str) = line.split_whitespace().nth(1) {
262 if let Ok(kb) = kb_str.parse::<usize>() {
263 return kb * 1024; }
265 }
266 }
267 }
268 }
269 }
270
271 8 * 1024 * 1024 * 1024
273 }
274
275 fn get_cpu_cores() -> usize {
277 std::thread::available_parallelism()
278 .map(|p| p.get())
279 .unwrap_or(4) }
281}
282
283impl Default for GpuDeviceManager {
284 fn default() -> Self {
285 Self::new().unwrap_or_else(|_| Self {
286 devices: vec![],
287 current_device: None,
288 })
289 }
290}