mod gaudi;
mod neuron;
mod tpu;
pub use gaudi::GaudiGeneration;
pub use neuron::NeuronChipType;
pub use tpu::TpuVersion;
use std::fmt;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum AcceleratorFamily {
Cpu,
Gpu,
Npu,
Tpu,
AiAsic,
}
impl fmt::Display for AcceleratorFamily {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Cpu => write!(f, "CPU"),
Self::Gpu => write!(f, "GPU"),
Self::Npu => write!(f, "NPU"),
Self::Tpu => write!(f, "TPU"),
Self::AiAsic => write!(f, "AI ASIC"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum AcceleratorType {
Cpu,
CudaGpu { device_id: u32 },
RocmGpu { device_id: u32 },
MetalGpu,
VulkanGpu { device_id: u32 },
IntelNpu,
AmdXdnaNpu { device_id: u32 },
AppleNpu,
Tpu {
device_id: u32,
chip_count: u32,
version: TpuVersion,
},
Gaudi {
device_id: u32,
generation: GaudiGeneration,
},
AwsNeuron {
device_id: u32,
chip_type: NeuronChipType,
core_count: u32,
},
QualcommAi100 { device_id: u32 },
IntelOneApi { device_id: u32 },
CerebrasWse { device_id: u32 },
GraphcoreIpu { device_id: u32 },
GroqLpu { device_id: u32 },
SamsungNpu { device_id: u32 },
MediaTekApu { device_id: u32 },
}
impl AcceleratorType {
#[inline]
pub fn is_gpu(&self) -> bool {
matches!(
self,
Self::CudaGpu { .. }
| Self::RocmGpu { .. }
| Self::MetalGpu
| Self::VulkanGpu { .. }
| Self::IntelOneApi { .. }
)
}
#[inline]
pub fn is_npu(&self) -> bool {
matches!(
self,
Self::IntelNpu
| Self::AppleNpu
| Self::AmdXdnaNpu { .. }
| Self::SamsungNpu { .. }
| Self::MediaTekApu { .. }
)
}
#[inline]
pub fn is_tpu(&self) -> bool {
matches!(self, Self::Tpu { .. })
}
#[inline]
pub fn is_ai_asic(&self) -> bool {
matches!(
self,
Self::Gaudi { .. }
| Self::AwsNeuron { .. }
| Self::QualcommAi100 { .. }
| Self::CerebrasWse { .. }
| Self::GraphcoreIpu { .. }
| Self::GroqLpu { .. }
)
}
#[inline]
pub fn family(&self) -> AcceleratorFamily {
match self {
Self::Cpu => AcceleratorFamily::Cpu,
Self::CudaGpu { .. }
| Self::RocmGpu { .. }
| Self::MetalGpu
| Self::VulkanGpu { .. }
| Self::IntelOneApi { .. } => AcceleratorFamily::Gpu,
Self::IntelNpu
| Self::AppleNpu
| Self::AmdXdnaNpu { .. }
| Self::SamsungNpu { .. }
| Self::MediaTekApu { .. } => AcceleratorFamily::Npu,
Self::Tpu { .. } => AcceleratorFamily::Tpu,
Self::Gaudi { .. }
| Self::AwsNeuron { .. }
| Self::QualcommAi100 { .. }
| Self::CerebrasWse { .. }
| Self::GraphcoreIpu { .. }
| Self::GroqLpu { .. } => AcceleratorFamily::AiAsic,
}
}
#[inline]
pub fn throughput_multiplier(&self) -> f64 {
match self {
Self::Cpu => 1.0,
Self::CudaGpu { .. } => 20.0,
Self::RocmGpu { .. } => 15.0,
Self::MetalGpu => 12.0,
Self::VulkanGpu { .. } => 10.0,
Self::IntelNpu => 8.0,
Self::AppleNpu => 10.0,
Self::AmdXdnaNpu { .. } => 7.0,
Self::Tpu { version, .. } => match version {
TpuVersion::V4 => 25.0,
TpuVersion::V5e => 18.0,
TpuVersion::V5p => 35.0,
},
Self::Gaudi { generation, .. } => match generation {
GaudiGeneration::Gaudi2 => 22.0,
GaudiGeneration::Gaudi3 => 30.0,
},
Self::AwsNeuron {
chip_type,
core_count,
..
} => {
let base = match chip_type {
NeuronChipType::Inferentia => 15.0,
NeuronChipType::Trainium => 24.0,
};
base * (*core_count as f64 / 2.0).max(1.0)
}
Self::QualcommAi100 { .. } => 14.0,
Self::IntelOneApi { .. } => 13.0,
Self::CerebrasWse { .. } => 50.0,
Self::GraphcoreIpu { .. } => 16.0,
Self::GroqLpu { .. } => 30.0,
Self::SamsungNpu { .. } => 6.0,
Self::MediaTekApu { .. } => 5.0,
}
}
#[inline]
pub fn training_multiplier(&self) -> f64 {
match self {
Self::Cpu => 1.0,
Self::CudaGpu { .. } => 20.0,
Self::RocmGpu { .. } => 15.0,
Self::MetalGpu => 10.0,
Self::VulkanGpu { .. } => 6.0, Self::IntelNpu
| Self::AppleNpu
| Self::AmdXdnaNpu { .. }
| Self::SamsungNpu { .. }
| Self::MediaTekApu { .. } => 0.0, Self::Tpu { version, .. } => match version {
TpuVersion::V4 => 28.0,
TpuVersion::V5e => 20.0,
TpuVersion::V5p => 40.0,
},
Self::Gaudi { generation, .. } => match generation {
GaudiGeneration::Gaudi2 => 25.0,
GaudiGeneration::Gaudi3 => 35.0,
},
Self::AwsNeuron {
chip_type,
core_count,
..
} => match chip_type {
NeuronChipType::Inferentia => 0.0, NeuronChipType::Trainium => 26.0 * (*core_count as f64 / 2.0).max(1.0),
},
Self::QualcommAi100 { .. } => 0.0, Self::IntelOneApi { .. } => 10.0, Self::CerebrasWse { .. } => 60.0,
Self::GraphcoreIpu { .. } => 18.0,
Self::GroqLpu { .. } => 0.0, }
}
#[inline]
pub fn supports_training(&self) -> bool {
self.training_multiplier() > 0.0
}
pub(crate) fn rank(&self) -> u32 {
match self {
Self::CerebrasWse { .. } => 85,
Self::Tpu {
version: TpuVersion::V5p,
..
} => 80,
Self::Gaudi {
generation: GaudiGeneration::Gaudi3,
..
} => 75,
Self::Tpu {
version: TpuVersion::V4,
..
} => 70,
Self::Gaudi {
generation: GaudiGeneration::Gaudi2,
..
} => 65,
Self::CudaGpu { .. } => 60,
Self::AwsNeuron {
chip_type: NeuronChipType::Trainium,
..
} => 58,
Self::Tpu {
version: TpuVersion::V5e,
..
} => 55,
Self::GroqLpu { .. } => 55,
Self::RocmGpu { .. } => 50,
Self::GraphcoreIpu { .. } => 48,
Self::AwsNeuron {
chip_type: NeuronChipType::Inferentia,
..
} => 45,
Self::IntelOneApi { .. } => 42,
Self::MetalGpu => 40,
Self::QualcommAi100 { .. } => 38,
Self::VulkanGpu { .. } => 35,
Self::AppleNpu => 30,
Self::AmdXdnaNpu { .. } => 25,
Self::IntelNpu => 20,
Self::SamsungNpu { .. } => 18,
Self::MediaTekApu { .. } => 15,
Self::Cpu => 10,
}
}
}
impl fmt::Display for AcceleratorType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Cpu => write!(f, "CPU"),
Self::CudaGpu { device_id } => write!(f, "CUDA GPU (device {})", device_id),
Self::RocmGpu { device_id } => write!(f, "ROCm GPU (device {})", device_id),
Self::MetalGpu => write!(f, "Metal GPU"),
Self::VulkanGpu { device_id } => write!(f, "Vulkan GPU (device {})", device_id),
Self::IntelNpu => write!(f, "Intel NPU"),
Self::AmdXdnaNpu { device_id } => write!(f, "AMD XDNA NPU (device {})", device_id),
Self::AppleNpu => write!(f, "Apple Neural Engine"),
Self::Tpu {
device_id,
chip_count,
version,
} => {
write!(
f,
"TPU {} (device {}, {} chips)",
version, device_id, chip_count
)
}
Self::Gaudi {
device_id,
generation,
} => {
write!(f, "Intel {} (device {})", generation, device_id)
}
Self::AwsNeuron {
device_id,
chip_type,
core_count,
} => {
write!(
f,
"AWS {} (device {}, {} cores)",
chip_type, device_id, core_count
)
}
Self::QualcommAi100 { device_id } => {
write!(f, "Qualcomm Cloud AI 100 (device {})", device_id)
}
Self::IntelOneApi { device_id } => {
write!(f, "Intel oneAPI GPU (device {})", device_id)
}
Self::CerebrasWse { device_id } => {
write!(f, "Cerebras WSE (device {})", device_id)
}
Self::GraphcoreIpu { device_id } => {
write!(f, "Graphcore IPU (device {})", device_id)
}
Self::GroqLpu { device_id } => {
write!(f, "Groq LPU (device {})", device_id)
}
Self::SamsungNpu { device_id } => {
write!(f, "Samsung NPU (device {})", device_id)
}
Self::MediaTekApu { device_id } => {
write!(f, "MediaTek APU (device {})", device_id)
}
}
}
}