#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Duration;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum GpuVendor {
Nvidia,
Amd,
Intel,
Unknown(String),
}
impl std::fmt::Display for GpuVendor {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
GpuVendor::Nvidia => write!(f, "NVIDIA"),
GpuVendor::Amd => write!(f, "AMD"),
GpuVendor::Intel => write!(f, "Intel"),
GpuVendor::Unknown(name) => write!(f, "{}", name),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum GpuModel {
NvidiaA100,
NvidiaH100,
NvidiaL40S,
NvidiaT4,
NvidiaV100,
AmdMI250X,
AmdMI300X,
AmdMI210,
IntelMax1550,
Other(String),
}
impl GpuModel {
pub fn from_name(name: &str) -> Self {
let upper = name.to_uppercase();
if upper.contains("A100") {
GpuModel::NvidiaA100
} else if upper.contains("H100") {
GpuModel::NvidiaH100
} else if upper.contains("L40") {
GpuModel::NvidiaL40S
} else if upper.contains("T4") && !upper.contains("RTX") {
GpuModel::NvidiaT4
} else if upper.contains("V100") {
GpuModel::NvidiaV100
} else if upper.contains("MI250") {
GpuModel::AmdMI250X
} else if upper.contains("MI300") {
GpuModel::AmdMI300X
} else if upper.contains("MI210") {
GpuModel::AmdMI210
} else if upper.contains("MAX") && upper.contains("1550") {
GpuModel::IntelMax1550
} else {
GpuModel::Other(name.to_string())
}
}
}
impl std::fmt::Display for GpuModel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
GpuModel::NvidiaA100 => write!(f, "NVIDIA A100"),
GpuModel::NvidiaH100 => write!(f, "NVIDIA H100"),
GpuModel::NvidiaL40S => write!(f, "NVIDIA L40S"),
GpuModel::NvidiaT4 => write!(f, "NVIDIA T4"),
GpuModel::NvidiaV100 => write!(f, "NVIDIA V100"),
GpuModel::AmdMI250X => write!(f, "AMD Instinct MI250X"),
GpuModel::AmdMI300X => write!(f, "AMD Instinct MI300X"),
GpuModel::AmdMI210 => write!(f, "AMD Instinct MI210"),
GpuModel::IntelMax1550 => write!(f, "Intel Data Center GPU Max 1550"),
GpuModel::Other(name) => write!(f, "{}", name),
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct NutanixConfig {
pub base_url: String,
pub api_key: String,
#[cfg_attr(feature = "serde", serde(default))]
pub username: Option<String>,
#[cfg_attr(feature = "serde", serde(default))]
pub password: Option<String>,
#[cfg_attr(feature = "serde", serde(with = "duration_serde", default = "default_timeout"))]
pub timeout: Duration,
#[cfg_attr(feature = "serde", serde(default = "default_true"))]
pub verify_ssl: bool,
#[cfg_attr(feature = "serde", serde(default = "default_api_version"))]
pub api_version: String,
}
fn default_timeout() -> Duration {
Duration::from_secs(30)
}
fn default_true() -> bool {
true
}
fn default_api_version() -> String {
"v3".to_string()
}
impl Default for NutanixConfig {
fn default() -> Self {
Self {
base_url: String::new(),
api_key: String::new(),
username: None,
password: None,
timeout: default_timeout(),
verify_ssl: true,
api_version: default_api_version(),
}
}
}
impl NutanixConfig {
pub fn new(base_url: impl Into<String>, api_key: impl Into<String>) -> Self {
Self {
base_url: base_url.into(),
api_key: api_key.into(),
..Default::default()
}
}
pub fn with_basic_auth(
base_url: impl Into<String>,
username: impl Into<String>,
password: impl Into<String>,
) -> Self {
Self {
base_url: base_url.into(),
api_key: String::new(),
username: Some(username.into()),
password: Some(password.into()),
..Default::default()
}
}
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.timeout = timeout;
self
}
pub fn with_insecure_ssl(mut self) -> Self {
self.verify_ssl = false;
self
}
pub fn api_url(&self, path: &str) -> String {
let base = self.base_url.trim_end_matches('/');
format!("{}/api/nutanix/{}/{}", base, self.api_version, path.trim_start_matches('/'))
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GpuInfo {
pub vendor: GpuVendor,
pub model: GpuModel,
pub device_id: String,
pub memory_bytes: u64,
pub compute_units: u32,
pub assigned: bool,
#[cfg_attr(feature = "serde", serde(default))]
pub assigned_vm: Option<String>,
#[cfg_attr(feature = "serde", serde(default = "default_gpu_mode"))]
pub mode: String,
#[cfg_attr(feature = "serde", serde(default))]
pub numa_node: Option<u32>,
}
fn default_gpu_mode() -> String {
"passthrough".to_string()
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct HostCapabilities {
pub host_id: String,
pub host_name: String,
pub cpu_arch: String,
pub cpu_cores: u32,
pub ram_bytes: u64,
pub has_nvidia: bool,
pub has_amd: bool,
pub is_arm: bool,
pub gpus: Vec<GpuInfo>,
pub hypervisor: String,
pub aos_version: String,
pub gpu_passthrough_supported: bool,
pub vgpu_supported: bool,
#[cfg_attr(feature = "serde", serde(default))]
pub metadata: HashMap<String, String>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GpuNode {
pub host_id: String,
pub host_name: String,
pub cluster_id: String,
pub cluster_name: String,
pub ip_address: String,
pub available_gpus: Vec<GpuInfo>,
pub total_gpus: Vec<GpuInfo>,
pub capabilities: HostCapabilities,
}
impl GpuNode {
pub fn available_gpu_count(&self, vendor: &GpuVendor) -> usize {
self.available_gpus
.iter()
.filter(|g| &g.vendor == vendor)
.count()
}
pub fn available_gpu_memory(&self) -> u64 {
self.available_gpus.iter().map(|g| g.memory_bytes).sum()
}
pub fn has_available_gpus(&self, vendor: &GpuVendor, count: usize) -> bool {
self.available_gpu_count(vendor) >= count
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GpuClusterSummary {
pub cluster_id: String,
pub cluster_name: String,
pub gpu_host_count: u32,
pub total_gpu_count: u32,
pub available_gpu_count: u32,
pub gpus_by_vendor: HashMap<String, u32>,
pub gpus_by_model: HashMap<String, u32>,
pub total_gpu_memory_bytes: u64,
pub available_gpu_memory_bytes: u64,
pub nodes: Vec<GpuNode>,
}
impl GpuClusterSummary {
pub fn dominant_vendor(&self) -> Option<String> {
self.gpus_by_vendor
.iter()
.max_by_key(|(_, count)| *count)
.map(|(vendor, _)| vendor.clone())
}
pub fn is_multi_vendor(&self) -> bool {
self.gpus_by_vendor.len() > 1
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DeploymentConfig {
pub name: String,
#[cfg_attr(feature = "serde", serde(default = "default_namespace"))]
pub namespace: String,
pub image: String,
#[cfg_attr(feature = "serde", serde(default = "default_replicas"))]
pub replicas: u32,
pub gpu_vendor: GpuVendor,
#[cfg_attr(feature = "serde", serde(default = "default_gpu_count"))]
pub gpus_per_pod: u32,
#[cfg_attr(feature = "serde", serde(default = "default_cpu_request"))]
pub cpu_request: String,
#[cfg_attr(feature = "serde", serde(default = "default_cpu_limit"))]
pub cpu_limit: String,
#[cfg_attr(feature = "serde", serde(default = "default_mem_request"))]
pub memory_request: String,
#[cfg_attr(feature = "serde", serde(default = "default_mem_limit"))]
pub memory_limit: String,
#[cfg_attr(feature = "serde", serde(default = "default_cache_size"))]
pub kernel_cache_size: String,
#[cfg_attr(feature = "serde", serde(default = "default_storage_class"))]
pub storage_class: String,
#[cfg_attr(feature = "serde", serde(default = "default_service_port"))]
pub service_port: u16,
#[cfg_attr(feature = "serde", serde(default))]
pub enable_hpa: bool,
#[cfg_attr(feature = "serde", serde(default = "default_hpa_min"))]
pub hpa_min_replicas: u32,
#[cfg_attr(feature = "serde", serde(default = "default_hpa_max"))]
pub hpa_max_replicas: u32,
#[cfg_attr(feature = "serde", serde(default = "default_hpa_target"))]
pub hpa_target_gpu_utilization: u32,
#[cfg_attr(feature = "serde", serde(default))]
pub env_vars: HashMap<String, String>,
#[cfg_attr(feature = "serde", serde(default))]
pub labels: HashMap<String, String>,
#[cfg_attr(feature = "serde", serde(default))]
pub annotations: HashMap<String, String>,
}
fn default_namespace() -> String {
"cuda-wasm".to_string()
}
fn default_replicas() -> u32 {
1
}
fn default_gpu_count() -> u32 {
1
}
fn default_cpu_request() -> String {
"1000m".to_string()
}
fn default_cpu_limit() -> String {
"4000m".to_string()
}
fn default_mem_request() -> String {
"4Gi".to_string()
}
fn default_mem_limit() -> String {
"16Gi".to_string()
}
fn default_cache_size() -> String {
"10Gi".to_string()
}
fn default_storage_class() -> String {
"nutanix-volume".to_string()
}
fn default_service_port() -> u16 {
8080
}
fn default_hpa_min() -> u32 {
1
}
fn default_hpa_max() -> u32 {
8
}
fn default_hpa_target() -> u32 {
70
}
impl Default for DeploymentConfig {
fn default() -> Self {
Self {
name: "cuda-wasm-worker".to_string(),
namespace: default_namespace(),
image: "cuda-wasm:latest".to_string(),
replicas: default_replicas(),
gpu_vendor: GpuVendor::Nvidia,
gpus_per_pod: default_gpu_count(),
cpu_request: default_cpu_request(),
cpu_limit: default_cpu_limit(),
memory_request: default_mem_request(),
memory_limit: default_mem_limit(),
kernel_cache_size: default_cache_size(),
storage_class: default_storage_class(),
service_port: default_service_port(),
enable_hpa: false,
hpa_min_replicas: default_hpa_min(),
hpa_max_replicas: default_hpa_max(),
hpa_target_gpu_utilization: default_hpa_target(),
env_vars: HashMap::new(),
labels: HashMap::new(),
annotations: HashMap::new(),
}
}
}
impl DeploymentConfig {
pub fn new(name: impl Into<String>, image: impl Into<String>) -> Self {
Self {
name: name.into(),
image: image.into(),
..Default::default()
}
}
pub fn with_gpu_vendor(mut self, vendor: GpuVendor) -> Self {
self.gpu_vendor = vendor;
self
}
pub fn with_gpus(mut self, count: u32) -> Self {
self.gpus_per_pod = count;
self
}
pub fn with_hpa(mut self, min: u32, max: u32, target_utilization: u32) -> Self {
self.enable_hpa = true;
self.hpa_min_replicas = min;
self.hpa_max_replicas = max;
self.hpa_target_gpu_utilization = target_utilization;
self
}
pub fn with_nke_annotation(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.annotations.insert(key.into(), value.into());
self
}
}
#[cfg(feature = "serde")]
mod duration_serde {
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::time::Duration;
pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
duration.as_secs().serialize(serializer)
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
where
D: Deserializer<'de>,
{
let secs = u64::deserialize(deserializer)?;
Ok(Duration::from_secs(secs))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_nutanix_config_new() {
let config = NutanixConfig::new("https://prism.example.com:9440", "my-api-key");
assert_eq!(config.base_url, "https://prism.example.com:9440");
assert_eq!(config.api_key, "my-api-key");
assert_eq!(config.timeout, Duration::from_secs(30));
assert!(config.verify_ssl);
}
#[test]
fn test_nutanix_config_api_url() {
let config = NutanixConfig::new("https://prism.example.com:9440", "key");
assert_eq!(
config.api_url("hosts/list"),
"https://prism.example.com:9440/api/nutanix/v3/hosts/list"
);
}
#[test]
fn test_deployment_config_default() {
let config = DeploymentConfig::default();
assert_eq!(config.namespace, "cuda-wasm");
assert_eq!(config.replicas, 1);
assert_eq!(config.gpus_per_pod, 1);
}
#[test]
fn test_deployment_config_builder() {
let config = DeploymentConfig::new("my-workload", "my-image:v1")
.with_gpu_vendor(GpuVendor::Amd)
.with_gpus(2)
.with_hpa(1, 4, 80);
assert_eq!(config.name, "my-workload");
assert_eq!(config.gpu_vendor, GpuVendor::Amd);
assert_eq!(config.gpus_per_pod, 2);
assert!(config.enable_hpa);
assert_eq!(config.hpa_max_replicas, 4);
}
#[test]
fn test_gpu_vendor_display() {
assert_eq!(GpuVendor::Nvidia.to_string(), "NVIDIA");
assert_eq!(GpuVendor::Amd.to_string(), "AMD");
assert_eq!(GpuVendor::Unknown("Custom".into()).to_string(), "Custom");
}
#[test]
fn test_gpu_node_helpers() {
let node = GpuNode {
host_id: "host-1".to_string(),
host_name: "gpu-host-01".to_string(),
cluster_id: "cluster-1".to_string(),
cluster_name: "GPU Cluster".to_string(),
ip_address: "10.0.0.1".to_string(),
available_gpus: vec![
GpuInfo {
vendor: GpuVendor::Nvidia,
model: GpuModel::NvidiaA100,
device_id: "gpu-0".into(),
memory_bytes: 80 * 1024 * 1024 * 1024,
compute_units: 108,
assigned: false,
assigned_vm: None,
mode: "passthrough".into(),
numa_node: Some(0),
},
GpuInfo {
vendor: GpuVendor::Nvidia,
model: GpuModel::NvidiaA100,
device_id: "gpu-1".into(),
memory_bytes: 80 * 1024 * 1024 * 1024,
compute_units: 108,
assigned: false,
assigned_vm: None,
mode: "passthrough".into(),
numa_node: Some(1),
},
],
total_gpus: vec![],
capabilities: HostCapabilities {
host_id: "host-1".into(),
host_name: "gpu-host-01".into(),
cpu_arch: "x86_64".into(),
cpu_cores: 64,
ram_bytes: 512 * 1024 * 1024 * 1024,
has_nvidia: true,
has_amd: false,
is_arm: false,
gpus: vec![],
hypervisor: "AHV".into(),
aos_version: "6.7".into(),
gpu_passthrough_supported: true,
vgpu_supported: true,
metadata: HashMap::new(),
},
};
assert_eq!(node.available_gpu_count(&GpuVendor::Nvidia), 2);
assert_eq!(node.available_gpu_count(&GpuVendor::Amd), 0);
assert!(node.has_available_gpus(&GpuVendor::Nvidia, 2));
assert!(!node.has_available_gpus(&GpuVendor::Nvidia, 3));
assert_eq!(node.available_gpu_memory(), 2 * 80 * 1024 * 1024 * 1024);
}
}