1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
//! Compute device abstraction.
use serde::{Deserialize, Serialize};
use super::apple::AppleSiliconInfo;
use super::cpu::CpuInfo;
use super::gpu::GpuInfo;
use super::tpu::TpuInfo;
/// Compute device abstraction
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum ComputeDevice {
/// CPU device
Cpu(CpuInfo),
/// GPU device (NVIDIA, AMD, Intel)
Gpu(GpuInfo),
/// TPU device (Google)
Tpu(TpuInfo),
/// Apple Silicon with unified memory
AppleSilicon(AppleSiliconInfo),
}
impl ComputeDevice {
/// Auto-detect available compute devices
pub fn detect() -> Vec<Self> {
let mut devices = Vec::new();
// Always detect CPU
devices.push(Self::Cpu(CpuInfo::detect()));
// Check for Apple Silicon
if let Some(apple) = AppleSiliconInfo::detect() {
devices.push(Self::AppleSilicon(apple));
}
// GPU detection would require platform-specific APIs (CUDA, ROCm, Metal)
// For now, we only auto-detect CPU and Apple Silicon
devices
}
/// Check if this is a GPU device
pub fn is_gpu(&self) -> bool {
matches!(self, Self::Gpu(_))
}
/// Check if this is a CPU device
pub fn is_cpu(&self) -> bool {
matches!(self, Self::Cpu(_))
}
/// Check if this is a TPU device
pub fn is_tpu(&self) -> bool {
matches!(self, Self::Tpu(_))
}
/// Check if this is Apple Silicon
pub fn is_apple_silicon(&self) -> bool {
matches!(self, Self::AppleSilicon(_))
}
/// Get available memory in bytes
pub fn memory_bytes(&self) -> u64 {
match self {
Self::Cpu(info) => {
// Return system memory (approximation)
// In real implementation, would query actual system RAM
u64::from(info.cores) * 4 * 1024 * 1024 * 1024 // ~4GB per core estimate
}
Self::Gpu(info) => info.vram_bytes,
Self::Tpu(info) => info.hbm_bytes,
Self::AppleSilicon(info) => info.unified_memory_bytes,
}
}
/// Get device name
pub fn name(&self) -> &str {
match self {
Self::Cpu(info) => &info.model,
Self::Gpu(info) => &info.name,
Self::Tpu(info) => &info.version,
Self::AppleSilicon(info) => &info.chip,
}
}
/// Get compute cores/units
pub fn compute_units(&self) -> u32 {
match self {
Self::Cpu(info) => info.threads,
Self::Gpu(_) => 0, // Would need more info
Self::Tpu(info) => info.cores,
Self::AppleSilicon(info) => info.total_cpu_cores() + info.gpu_cores,
}
}
/// Estimate relative compute power (normalized, CPU = 1.0)
pub fn relative_compute_power(&self) -> f64 {
match self {
Self::Cpu(info) => f64::from(info.threads) / 8.0, // Normalize to 8-thread CPU
Self::Gpu(info) => {
// Rough estimate based on VRAM (proxy for capability)
10.0 * (info.vram_gb() / 8.0) // 8GB GPU ~ 10x CPU
}
Self::Tpu(info) => {
// TPUs are highly optimized for matrix ops
50.0 * (f64::from(info.cores) / 8.0)
}
Self::AppleSilicon(info) => {
// P-cores are faster, E-cores are efficient
(f64::from(info.p_cores) * 1.5 + f64::from(info.e_cores) * 0.5) / 8.0
+ f64::from(info.gpu_cores) * 0.5
}
}
}
}
impl std::fmt::Display for ComputeDevice {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Cpu(info) => write!(
f,
"CPU: {} ({} cores, {} threads, {})",
info.model, info.cores, info.threads, info.simd
),
Self::Gpu(info) => {
write!(f, "GPU: {} ({:.1} GB VRAM", info.name, info.vram_gb())?;
if let Some((major, minor)) = info.compute_capability {
write!(f, ", SM {major}.{minor}")?;
}
write!(f, ")")
}
Self::Tpu(info) => write!(
f,
"TPU: {} ({} cores, {:.1} GB HBM)",
info.version,
info.cores,
info.hbm_gb()
),
Self::AppleSilicon(info) => write!(
f,
"Apple Silicon: {} ({}P+{}E cores, {} GPU cores, {:.1} GB)",
info.chip,
info.p_cores,
info.e_cores,
info.gpu_cores,
info.unified_memory_gb()
),
}
}
}