1use crate::tensor::Tensor;
11use crate::error::{GhostError, Result};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum HardwareBackend {
16 CPU,
18 CUDA,
20 ROCm,
22 Metal,
24 TPU,
26}
27
28#[derive(Debug, Clone)]
30pub struct HardwareDevice {
31 pub backend: HardwareBackend,
33 pub device_id: usize,
35 pub name: String,
37 pub total_memory: usize,
39 pub available_memory: usize,
41 pub compute_capability: Option<(u32, u32)>,
43}
44
45impl HardwareDevice {
46 pub fn cpu() -> Self {
48 HardwareDevice {
49 backend: HardwareBackend::CPU,
50 device_id: 0,
51 name: "CPU".to_string(),
52 total_memory: 0,
53 available_memory: 0,
54 compute_capability: None,
55 }
56 }
57
58 pub fn cuda(device_id: usize) -> Result<Self> {
60 #[cfg(feature = "cuda")]
61 {
62 Ok(HardwareDevice {
64 backend: HardwareBackend::CUDA,
65 device_id,
66 name: format!("CUDA Device {}", device_id),
67 total_memory: 0, available_memory: 0,
69 compute_capability: Some((7, 5)), })
71 }
72 #[cfg(not(feature = "cuda"))]
73 {
74 Err(GhostError::DeviceError("CUDA support not compiled".to_string()))
75 }
76 }
77
78 pub fn rocm(device_id: usize) -> Result<Self> {
80 #[cfg(feature = "rocm")]
81 {
82 Ok(HardwareDevice {
83 backend: HardwareBackend::ROCm,
84 device_id,
85 name: format!("ROCm Device {}", device_id),
86 total_memory: 0,
87 available_memory: 0,
88 compute_capability: None,
89 })
90 }
91 #[cfg(not(feature = "rocm"))]
92 {
93 Err(GhostError::DeviceError("ROCm support not compiled".to_string()))
94 }
95 }
96
97 pub fn metal(device_id: usize) -> Result<Self> {
99 #[cfg(feature = "metal")]
100 {
101 Ok(HardwareDevice {
102 backend: HardwareBackend::Metal,
103 device_id,
104 name: format!("Metal Device {}", device_id),
105 total_memory: 0,
106 available_memory: 0,
107 compute_capability: None,
108 })
109 }
110 #[cfg(not(feature = "metal"))]
111 {
112 Err(GhostError::DeviceError("Metal support not compiled".to_string()))
113 }
114 }
115
116 pub fn tpu(device_id: usize) -> Result<Self> {
118 #[cfg(feature = "tpu")]
119 {
120 Ok(HardwareDevice {
121 backend: HardwareBackend::TPU,
122 device_id,
123 name: format!("TPU Device {}", device_id),
124 total_memory: 0,
125 available_memory: 0,
126 compute_capability: None,
127 })
128 }
129 #[cfg(not(feature = "tpu"))]
130 {
131 Err(GhostError::DeviceError("TPU support not compiled".to_string()))
132 }
133 }
134}
135
136pub fn list_devices() -> Vec<HardwareDevice> {
138 let mut devices = vec![HardwareDevice::cpu()];
139
140 #[cfg(feature = "cuda")]
142 {
143 match crate::cuda::get_device_count() {
144 Ok(count) => {
145 for i in 0..count {
146 if let Ok(device) = HardwareDevice::cuda(i) {
147 devices.push(device);
148 }
149 }
150 }
151 Err(_) => {}
152 }
153 }
154
155 #[cfg(feature = "rocm")]
157 {
158 match crate::rocm::RocmDevice::device_count() {
159 Ok(count) => {
160 for i in 0..count {
161 if let Ok(device) = HardwareDevice::rocm(i) {
162 devices.push(device);
163 }
164 }
165 }
166 Err(_) => {}
167 }
168 }
169
170 #[cfg(feature = "metal")]
172 {
173 match crate::metal::MetalDevice::device_count() {
174 Ok(count) => {
175 for i in 0..count {
176 if let Ok(device) = HardwareDevice::metal(i) {
177 devices.push(device);
178 }
179 }
180 }
181 Err(_) => {}
182 }
183 }
184
185 #[cfg(feature = "tpu")]
187 {
188 match crate::tpu::TpuDevice::device_count() {
189 Ok(count) => {
190 for i in 0..count {
191 if let Ok(device) = HardwareDevice::tpu(i) {
192 devices.push(device);
193 }
194 }
195 }
196 Err(_) => {}
197 }
198 }
199
200 devices
201}
202
203#[cfg(feature = "cuda")]
205fn cuda_device_count() -> Result<usize> {
206 Ok(1)
208}
209
210#[cfg(feature = "rocm")]
211fn rocm_device_count() -> Result<usize> {
212 Ok(1)
214}
215
216#[cfg(feature = "metal")]
217fn metal_device_count() -> Result<usize> {
218 Ok(1)
220}
221
222#[cfg(feature = "tpu")]
223fn tpu_device_count() -> Result<usize> {
224 Ok(1)
226}
227
228pub trait HardwareOps {
230 fn matmul_hw(&self, other: &Tensor, device: &HardwareDevice) -> Result<Tensor>;
232
233 fn conv2d_hw(&self, kernel: &Tensor, device: &HardwareDevice) -> Result<Tensor>;
235
236 fn elementwise_hw(&self, op: ElementwiseOp, device: &HardwareDevice) -> Result<Tensor>;
238}
239
240#[derive(Debug, Clone, Copy)]
241pub enum ElementwiseOp {
242 Add,
243 Mul,
244 ReLU,
245 Sigmoid,
246 Tanh,
247}
248
249impl HardwareOps for Tensor {
250 fn matmul_hw(&self, other: &Tensor, device: &HardwareDevice) -> Result<Tensor> {
251 match device.backend {
252 HardwareBackend::CPU => {
253 #[cfg(target_arch = "aarch64")]
255 {
256 let a_data = self.data_f32();
258 let b_data = other.data_f32();
259 let dims_a = self.dims();
260 let dims_b = other.dims();
261
262 if dims_a.len() != 2 || dims_b.len() != 2 {
263 return Err(GhostError::InvalidShape("matmul requires 2D tensors".to_string()));
264 }
265
266 let (m, k) = (dims_a[0], dims_a[1]);
267 let (k2, n) = (dims_b[0], dims_b[1]);
268
269 if k != k2 {
270 return Err(GhostError::ShapeMismatch {
271 expected: vec![k],
272 got: vec![k2],
273 });
274 }
275
276 let mut result = vec![0.0f32; m * n];
277 crate::neon::matmul_neon(&a_data, &b_data, &mut result, m, n, k);
278 Tensor::from_slice(&result, &[m, n])
279 }
280 #[cfg(not(target_arch = "aarch64"))]
281 {
282 self.matmul(other)
283 }
284 }
285 HardwareBackend::CUDA => {
286 #[cfg(feature = "cuda")]
287 {
288 crate::cuda::ops::matmul_cuda(self, other, device.device_id)
289 }
290 #[cfg(not(feature = "cuda"))]
291 {
292 Err(GhostError::DeviceError("CUDA not available".to_string()))
293 }
294 }
295 HardwareBackend::ROCm => {
296 #[cfg(feature = "rocm")]
297 {
298 crate::rocm::ops::matmul_rocm(self, other, device.device_id)
299 }
300 #[cfg(not(feature = "rocm"))]
301 {
302 Err(GhostError::DeviceError("ROCm not available".to_string()))
303 }
304 }
305 HardwareBackend::Metal => {
306 #[cfg(feature = "metal")]
307 {
308 crate::metal::mps::matmul_mps(self, other, device.device_id)
309 }
310 #[cfg(not(feature = "metal"))]
311 {
312 Err(GhostError::DeviceError("Metal not available".to_string()))
313 }
314 }
315 HardwareBackend::TPU => {
316 #[cfg(feature = "tpu")]
317 {
318 crate::tpu::ops::matmul_tpu(self, other, device.device_id)
319 }
320 #[cfg(not(feature = "tpu"))]
321 {
322 Err(GhostError::DeviceError("TPU not available".to_string()))
323 }
324 }
325 }
326 }
327
328 fn conv2d_hw(&self, kernel: &Tensor, device: &HardwareDevice) -> Result<Tensor> {
329 match device.backend {
331 HardwareBackend::CPU => {
332 Err(GhostError::NotImplemented("CPU conv2d".to_string()))
334 }
335 _ => Err(GhostError::NotImplemented("Hardware conv2d".to_string())),
336 }
337 }
338
339 fn elementwise_hw(&self, op: ElementwiseOp, device: &HardwareDevice) -> Result<Tensor> {
340 match device.backend {
341 HardwareBackend::CPU => {
342 #[cfg(target_arch = "aarch64")]
343 {
344 match op {
346 ElementwiseOp::ReLU => Ok(self.relu_neon()),
347 ElementwiseOp::Sigmoid => {
348 let mut data = self.data_f32();
349 crate::neon::sigmoid_neon(&mut data);
350 Tensor::from_slice(&data, self.dims())
351 }
352 ElementwiseOp::Tanh => Ok(self.tanh()),
353 ElementwiseOp::Add | ElementwiseOp::Mul => {
354 Err(GhostError::InvalidOperation("Binary op requires two tensors".to_string()))
355 }
356 }
357 }
358 #[cfg(not(target_arch = "aarch64"))]
359 {
360 match op {
361 ElementwiseOp::ReLU => Ok(self.relu()),
362 ElementwiseOp::Sigmoid => Ok(self.sigmoid()),
363 ElementwiseOp::Tanh => Ok(self.tanh()),
364 ElementwiseOp::Add | ElementwiseOp::Mul => {
365 Err(GhostError::InvalidOperation("Binary op requires two tensors".to_string()))
366 }
367 }
368 }
369 }
370 HardwareBackend::CUDA => {
371 #[cfg(feature = "cuda")]
372 {
373 match op {
374 ElementwiseOp::ReLU => crate::cuda::ops::relu_cuda(self, device.device_id),
375 _ => Err(GhostError::NotImplemented("CUDA elementwise op".to_string())),
376 }
377 }
378 #[cfg(not(feature = "cuda"))]
379 {
380 Err(GhostError::DeviceError("CUDA not available".to_string()))
381 }
382 }
383 HardwareBackend::ROCm => {
384 #[cfg(feature = "rocm")]
385 {
386 match op {
387 ElementwiseOp::ReLU => crate::rocm::ops::relu_rocm(self, device.device_id),
388 _ => Err(GhostError::NotImplemented("ROCm elementwise op".to_string())),
389 }
390 }
391 #[cfg(not(feature = "rocm"))]
392 {
393 Err(GhostError::DeviceError("ROCm not available".to_string()))
394 }
395 }
396 HardwareBackend::Metal => {
397 #[cfg(feature = "metal")]
398 {
399 match op {
400 ElementwiseOp::ReLU => crate::metal::mps::relu_mps(self, device.device_id),
401 _ => Err(GhostError::NotImplemented("Metal elementwise op".to_string())),
402 }
403 }
404 #[cfg(not(feature = "metal"))]
405 {
406 Err(GhostError::DeviceError("Metal not available".to_string()))
407 }
408 }
409 HardwareBackend::TPU => {
410 Err(GhostError::NotImplemented("TPU elementwise ops".to_string()))
411 }
412 }
413 }
414}
415
416#[cfg(feature = "cuda")]
418fn cuda_matmul(_a: &Tensor, _b: &Tensor, _device_id: usize) -> Result<Tensor> {
419 Err(GhostError::NotImplemented("CUDA matmul".to_string()))
420}
421
422#[cfg(feature = "rocm")]
423fn rocm_matmul(_a: &Tensor, _b: &Tensor, _device_id: usize) -> Result<Tensor> {
424 Err(GhostError::NotImplemented("ROCm matmul".to_string()))
425}
426
427#[cfg(feature = "metal")]
428fn metal_matmul(_a: &Tensor, _b: &Tensor, _device_id: usize) -> Result<Tensor> {
429 Err(GhostError::NotImplemented("Metal matmul".to_string()))
430}
431
432#[cfg(feature = "tpu")]
433fn tpu_matmul(_a: &Tensor, _b: &Tensor, _device_id: usize) -> Result<Tensor> {
434 Err(GhostError::NotImplemented("TPU matmul".to_string()))
435}
436
437#[cfg(test)]
438mod tests {
439 use super::*;
440
441 #[test]
442 fn test_list_devices() {
443 let devices = list_devices();
444 assert!(!devices.is_empty());
445 assert_eq!(devices[0].backend, HardwareBackend::CPU);
446 }
447
448 #[test]
449 fn test_cpu_device() {
450 let device = HardwareDevice::cpu();
451 assert_eq!(device.backend, HardwareBackend::CPU);
452 assert_eq!(device.device_id, 0);
453 }
454}