ghostflow_core/
hardware.rs

1//! Hardware abstraction layer
2//!
3//! Provides unified interface for different hardware backends:
4//! - CUDA (NVIDIA GPUs)
5//! - ROCm (AMD GPUs)
6//! - Metal (Apple Silicon)
7//! - TPU (Google TPUs)
8//! - CPU with SIMD (AVX, NEON)
9
10use crate::tensor::Tensor;
11use crate::error::{GhostError, Result};
12
13/// Hardware backend type
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum HardwareBackend {
16    /// CPU with optional SIMD
17    CPU,
18    /// NVIDIA CUDA
19    CUDA,
20    /// AMD ROCm
21    ROCm,
22    /// Apple Metal
23    Metal,
24    /// Google TPU
25    TPU,
26}
27
28/// Hardware device information
29#[derive(Debug, Clone)]
30pub struct HardwareDevice {
31    /// Backend type
32    pub backend: HardwareBackend,
33    /// Device ID
34    pub device_id: usize,
35    /// Device name
36    pub name: String,
37    /// Total memory in bytes
38    pub total_memory: usize,
39    /// Available memory in bytes
40    pub available_memory: usize,
41    /// Compute capability (for CUDA/ROCm)
42    pub compute_capability: Option<(u32, u32)>,
43}
44
45impl HardwareDevice {
46    /// Create a CPU device
47    pub fn cpu() -> Self {
48        HardwareDevice {
49            backend: HardwareBackend::CPU,
50            device_id: 0,
51            name: "CPU".to_string(),
52            total_memory: 0,
53            available_memory: 0,
54            compute_capability: None,
55        }
56    }
57    
58    /// Create a CUDA device
59    pub fn cuda(device_id: usize) -> Result<Self> {
60        #[cfg(feature = "cuda")]
61        {
62            // Query CUDA device properties
63            Ok(HardwareDevice {
64                backend: HardwareBackend::CUDA,
65                device_id,
66                name: format!("CUDA Device {}", device_id),
67                total_memory: 0, // Would query actual memory
68                available_memory: 0,
69                compute_capability: Some((7, 5)), // Example
70            })
71        }
72        #[cfg(not(feature = "cuda"))]
73        {
74            Err(GhostError::DeviceError("CUDA support not compiled".to_string()))
75        }
76    }
77    
78    /// Create a ROCm device
79    pub fn rocm(device_id: usize) -> Result<Self> {
80        #[cfg(feature = "rocm")]
81        {
82            Ok(HardwareDevice {
83                backend: HardwareBackend::ROCm,
84                device_id,
85                name: format!("ROCm Device {}", device_id),
86                total_memory: 0,
87                available_memory: 0,
88                compute_capability: None,
89            })
90        }
91        #[cfg(not(feature = "rocm"))]
92        {
93            Err(GhostError::DeviceError("ROCm support not compiled".to_string()))
94        }
95    }
96    
97    /// Create a Metal device
98    pub fn metal(device_id: usize) -> Result<Self> {
99        #[cfg(feature = "metal")]
100        {
101            Ok(HardwareDevice {
102                backend: HardwareBackend::Metal,
103                device_id,
104                name: format!("Metal Device {}", device_id),
105                total_memory: 0,
106                available_memory: 0,
107                compute_capability: None,
108            })
109        }
110        #[cfg(not(feature = "metal"))]
111        {
112            Err(GhostError::DeviceError("Metal support not compiled".to_string()))
113        }
114    }
115    
116    /// Create a TPU device
117    pub fn tpu(device_id: usize) -> Result<Self> {
118        #[cfg(feature = "tpu")]
119        {
120            Ok(HardwareDevice {
121                backend: HardwareBackend::TPU,
122                device_id,
123                name: format!("TPU Device {}", device_id),
124                total_memory: 0,
125                available_memory: 0,
126                compute_capability: None,
127            })
128        }
129        #[cfg(not(feature = "tpu"))]
130        {
131            Err(GhostError::DeviceError("TPU support not compiled".to_string()))
132        }
133    }
134}
135
136/// List available devices
137pub fn list_devices() -> Vec<HardwareDevice> {
138    let mut devices = vec![HardwareDevice::cpu()];
139    
140    // Check for CUDA devices
141    #[cfg(feature = "cuda")]
142    {
143        match crate::cuda::get_device_count() {
144            Ok(count) => {
145                for i in 0..count {
146                    if let Ok(device) = HardwareDevice::cuda(i) {
147                        devices.push(device);
148                    }
149                }
150            }
151            Err(_) => {}
152        }
153    }
154    
155    // Check for ROCm devices
156    #[cfg(feature = "rocm")]
157    {
158        match crate::rocm::RocmDevice::device_count() {
159            Ok(count) => {
160                for i in 0..count {
161                    if let Ok(device) = HardwareDevice::rocm(i) {
162                        devices.push(device);
163                    }
164                }
165            }
166            Err(_) => {}
167        }
168    }
169    
170    // Check for Metal devices
171    #[cfg(feature = "metal")]
172    {
173        match crate::metal::MetalDevice::device_count() {
174            Ok(count) => {
175                for i in 0..count {
176                    if let Ok(device) = HardwareDevice::metal(i) {
177                        devices.push(device);
178                    }
179                }
180            }
181            Err(_) => {}
182        }
183    }
184    
185    // Check for TPU devices
186    #[cfg(feature = "tpu")]
187    {
188        match crate::tpu::TpuDevice::device_count() {
189            Ok(count) => {
190                for i in 0..count {
191                    if let Ok(device) = HardwareDevice::tpu(i) {
192                        devices.push(device);
193                    }
194                }
195            }
196            Err(_) => {}
197        }
198    }
199    
200    devices
201}
202
203// Placeholder functions for device counting
204#[cfg(feature = "cuda")]
205fn cuda_device_count() -> Result<usize> {
206    // Would use CUDA API
207    Ok(1)
208}
209
210#[cfg(feature = "rocm")]
211fn rocm_device_count() -> Result<usize> {
212    // Would use ROCm API
213    Ok(1)
214}
215
216#[cfg(feature = "metal")]
217fn metal_device_count() -> Result<usize> {
218    // Would use Metal API
219    Ok(1)
220}
221
222#[cfg(feature = "tpu")]
223fn tpu_device_count() -> Result<usize> {
224    // Would use TPU API
225    Ok(1)
226}
227
228/// Hardware-accelerated operations trait
229pub trait HardwareOps {
230    /// Matrix multiplication on hardware
231    fn matmul_hw(&self, other: &Tensor, device: &HardwareDevice) -> Result<Tensor>;
232    
233    /// Convolution on hardware
234    fn conv2d_hw(&self, kernel: &Tensor, device: &HardwareDevice) -> Result<Tensor>;
235    
236    /// Element-wise operations on hardware
237    fn elementwise_hw(&self, op: ElementwiseOp, device: &HardwareDevice) -> Result<Tensor>;
238}
239
240#[derive(Debug, Clone, Copy)]
241pub enum ElementwiseOp {
242    Add,
243    Mul,
244    ReLU,
245    Sigmoid,
246    Tanh,
247}
248
249impl HardwareOps for Tensor {
250    fn matmul_hw(&self, other: &Tensor, device: &HardwareDevice) -> Result<Tensor> {
251        match device.backend {
252            HardwareBackend::CPU => {
253                // Use optimized CPU implementation with SIMD if available
254                #[cfg(target_arch = "aarch64")]
255                {
256                    // Use NEON on ARM
257                    let a_data = self.data_f32();
258                    let b_data = other.data_f32();
259                    let dims_a = self.dims();
260                    let dims_b = other.dims();
261                    
262                    if dims_a.len() != 2 || dims_b.len() != 2 {
263                        return Err(GhostError::InvalidShape("matmul requires 2D tensors".to_string()));
264                    }
265                    
266                    let (m, k) = (dims_a[0], dims_a[1]);
267                    let (k2, n) = (dims_b[0], dims_b[1]);
268                    
269                    if k != k2 {
270                        return Err(GhostError::ShapeMismatch {
271                            expected: vec![k],
272                            got: vec![k2],
273                        });
274                    }
275                    
276                    let mut result = vec![0.0f32; m * n];
277                    crate::neon::matmul_neon(&a_data, &b_data, &mut result, m, n, k);
278                    Tensor::from_slice(&result, &[m, n])
279                }
280                #[cfg(not(target_arch = "aarch64"))]
281                {
282                    self.matmul(other)
283                }
284            }
285            HardwareBackend::CUDA => {
286                #[cfg(feature = "cuda")]
287                {
288                    crate::cuda::ops::matmul_cuda(self, other, device.device_id)
289                }
290                #[cfg(not(feature = "cuda"))]
291                {
292                    Err(GhostError::DeviceError("CUDA not available".to_string()))
293                }
294            }
295            HardwareBackend::ROCm => {
296                #[cfg(feature = "rocm")]
297                {
298                    crate::rocm::ops::matmul_rocm(self, other, device.device_id)
299                }
300                #[cfg(not(feature = "rocm"))]
301                {
302                    Err(GhostError::DeviceError("ROCm not available".to_string()))
303                }
304            }
305            HardwareBackend::Metal => {
306                #[cfg(feature = "metal")]
307                {
308                    crate::metal::mps::matmul_mps(self, other, device.device_id)
309                }
310                #[cfg(not(feature = "metal"))]
311                {
312                    Err(GhostError::DeviceError("Metal not available".to_string()))
313                }
314            }
315            HardwareBackend::TPU => {
316                #[cfg(feature = "tpu")]
317                {
318                    crate::tpu::ops::matmul_tpu(self, other, device.device_id)
319                }
320                #[cfg(not(feature = "tpu"))]
321                {
322                    Err(GhostError::DeviceError("TPU not available".to_string()))
323                }
324            }
325        }
326    }
327    
328    fn conv2d_hw(&self, kernel: &Tensor, device: &HardwareDevice) -> Result<Tensor> {
329        // Similar dispatch logic for convolution
330        match device.backend {
331            HardwareBackend::CPU => {
332                // Use CPU implementation
333                Err(GhostError::NotImplemented("CPU conv2d".to_string()))
334            }
335            _ => Err(GhostError::NotImplemented("Hardware conv2d".to_string())),
336        }
337    }
338    
339    fn elementwise_hw(&self, op: ElementwiseOp, device: &HardwareDevice) -> Result<Tensor> {
340        match device.backend {
341            HardwareBackend::CPU => {
342                #[cfg(target_arch = "aarch64")]
343                {
344                    // Use NEON optimizations on ARM
345                    match op {
346                        ElementwiseOp::ReLU => Ok(self.relu_neon()),
347                        ElementwiseOp::Sigmoid => {
348                            let mut data = self.data_f32();
349                            crate::neon::sigmoid_neon(&mut data);
350                            Tensor::from_slice(&data, self.dims())
351                        }
352                        ElementwiseOp::Tanh => Ok(self.tanh()),
353                        ElementwiseOp::Add | ElementwiseOp::Mul => {
354                            Err(GhostError::InvalidOperation("Binary op requires two tensors".to_string()))
355                        }
356                    }
357                }
358                #[cfg(not(target_arch = "aarch64"))]
359                {
360                    match op {
361                        ElementwiseOp::ReLU => Ok(self.relu()),
362                        ElementwiseOp::Sigmoid => Ok(self.sigmoid()),
363                        ElementwiseOp::Tanh => Ok(self.tanh()),
364                        ElementwiseOp::Add | ElementwiseOp::Mul => {
365                            Err(GhostError::InvalidOperation("Binary op requires two tensors".to_string()))
366                        }
367                    }
368                }
369            }
370            HardwareBackend::CUDA => {
371                #[cfg(feature = "cuda")]
372                {
373                    match op {
374                        ElementwiseOp::ReLU => crate::cuda::ops::relu_cuda(self, device.device_id),
375                        _ => Err(GhostError::NotImplemented("CUDA elementwise op".to_string())),
376                    }
377                }
378                #[cfg(not(feature = "cuda"))]
379                {
380                    Err(GhostError::DeviceError("CUDA not available".to_string()))
381                }
382            }
383            HardwareBackend::ROCm => {
384                #[cfg(feature = "rocm")]
385                {
386                    match op {
387                        ElementwiseOp::ReLU => crate::rocm::ops::relu_rocm(self, device.device_id),
388                        _ => Err(GhostError::NotImplemented("ROCm elementwise op".to_string())),
389                    }
390                }
391                #[cfg(not(feature = "rocm"))]
392                {
393                    Err(GhostError::DeviceError("ROCm not available".to_string()))
394                }
395            }
396            HardwareBackend::Metal => {
397                #[cfg(feature = "metal")]
398                {
399                    match op {
400                        ElementwiseOp::ReLU => crate::metal::mps::relu_mps(self, device.device_id),
401                        _ => Err(GhostError::NotImplemented("Metal elementwise op".to_string())),
402                    }
403                }
404                #[cfg(not(feature = "metal"))]
405                {
406                    Err(GhostError::DeviceError("Metal not available".to_string()))
407                }
408            }
409            HardwareBackend::TPU => {
410                Err(GhostError::NotImplemented("TPU elementwise ops".to_string()))
411            }
412        }
413    }
414}
415
416// Placeholder implementations for hardware-specific operations
417#[cfg(feature = "cuda")]
418fn cuda_matmul(_a: &Tensor, _b: &Tensor, _device_id: usize) -> Result<Tensor> {
419    Err(GhostError::NotImplemented("CUDA matmul".to_string()))
420}
421
422#[cfg(feature = "rocm")]
423fn rocm_matmul(_a: &Tensor, _b: &Tensor, _device_id: usize) -> Result<Tensor> {
424    Err(GhostError::NotImplemented("ROCm matmul".to_string()))
425}
426
427#[cfg(feature = "metal")]
428fn metal_matmul(_a: &Tensor, _b: &Tensor, _device_id: usize) -> Result<Tensor> {
429    Err(GhostError::NotImplemented("Metal matmul".to_string()))
430}
431
432#[cfg(feature = "tpu")]
433fn tpu_matmul(_a: &Tensor, _b: &Tensor, _device_id: usize) -> Result<Tensor> {
434    Err(GhostError::NotImplemented("TPU matmul".to_string()))
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440    
441    #[test]
442    fn test_list_devices() {
443        let devices = list_devices();
444        assert!(!devices.is_empty());
445        assert_eq!(devices[0].backend, HardwareBackend::CPU);
446    }
447    
448    #[test]
449    fn test_cpu_device() {
450        let device = HardwareDevice::cpu();
451        assert_eq!(device.backend, HardwareBackend::CPU);
452        assert_eq!(device.device_id, 0);
453    }
454}