Skip to main content

cuda_rust_wasm/nutanix/
deployment.rs

1//! Kubernetes / NKE deployment manifest generation for cuda-wasm workloads
2//!
3//! Generates complete Kubernetes YAML manifests for deploying cuda-wasm GPU workloads
4//! on Nutanix Kubernetes Engine (NKE) clusters, including:
5//!
6//! - Deployments with GPU resource requests (NVIDIA, AMD)
7//! - Node affinity rules for GPU vendor selection
8//! - ConfigMaps for cuda-wasm runtime configuration
9//! - PersistentVolumeClaims for kernel cache (Nutanix CSI)
10//! - Services and HorizontalPodAutoscalers
11//! - NKE-specific annotations and labels
12
13use super::config::*;
14use std::collections::HashMap;
15
16/// Generates Kubernetes deployment manifests for cuda-wasm workloads
17pub struct DeploymentGenerator {
18    config: DeploymentConfig,
19}
20
21impl DeploymentGenerator {
22    /// Create a new DeploymentGenerator from deployment configuration
23    pub fn new(config: DeploymentConfig) -> Self {
24        Self { config }
25    }
26
27    /// Generate a complete set of Kubernetes manifests as a single multi-document YAML string
28    ///
29    /// The output includes (separated by `---`):
30    /// 1. Namespace
31    /// 2. ConfigMap for runtime settings
32    /// 3. PersistentVolumeClaim for kernel cache
33    /// 4. Deployment with GPU resource requests
34    /// 5. Service
35    /// 6. HorizontalPodAutoscaler (if enabled)
36    pub fn generate_all(&self) -> String {
37        let mut manifests = vec![
38            self.generate_namespace(),
39            self.generate_configmap(),
40            self.generate_pvc(),
41            self.generate_deployment(),
42            self.generate_service(),
43        ];
44
45        if self.config.enable_hpa {
46            manifests.push(self.generate_hpa());
47        }
48
49        manifests.join("\n---\n")
50    }
51
52    /// Generate the Namespace manifest
53    pub fn generate_namespace(&self) -> String {
54        format!(
55            r#"apiVersion: v1
56kind: Namespace
57metadata:
58  name: {namespace}
59  labels:
60    app.kubernetes.io/part-of: cuda-wasm
61    platform: nutanix-nke"#,
62            namespace = self.config.namespace
63        )
64    }
65
66    /// Generate the ConfigMap for cuda-wasm runtime settings
67    pub fn generate_configmap(&self) -> String {
68        let mut env_entries = String::new();
69        for (key, value) in &self.config.env_vars {
70            env_entries.push_str(&format!("    {}={}\n", key, value));
71        }
72
73        let gpu_backend = match &self.config.gpu_vendor {
74            GpuVendor::Nvidia => "cuda",
75            GpuVendor::Amd => "rocm",
76            GpuVendor::Intel => "oneapi",
77            GpuVendor::Unknown(_) => "webgpu",
78        };
79
80        format!(
81            r#"apiVersion: v1
82kind: ConfigMap
83metadata:
84  name: {name}-config
85  namespace: {namespace}
86  labels:
87    app.kubernetes.io/name: {name}
88    app.kubernetes.io/component: config
89data:
90  CUDA_WASM_GPU_BACKEND: "{gpu_backend}"
91  CUDA_WASM_GPU_COUNT: "{gpu_count}"
92  CUDA_WASM_KERNEL_CACHE_DIR: "/cache/kernels"
93  CUDA_WASM_LOG_LEVEL: "info"
94  CUDA_WASM_WEBGPU_ENABLED: "true"
95  CUDA_WASM_MEMORY_POOL_SIZE: "2147483648"
96  CUDA_WASM_MAX_CONCURRENT_KERNELS: "16"
97{env_entries}"#,
98            name = self.config.name,
99            namespace = self.config.namespace,
100            gpu_backend = gpu_backend,
101            gpu_count = self.config.gpus_per_pod,
102            env_entries = if env_entries.is_empty() {
103                String::new()
104            } else {
105                format!("  # Custom environment variables\n{}", env_entries)
106            }
107        )
108    }
109
110    /// Generate the PersistentVolumeClaim for kernel cache storage (Nutanix CSI)
111    pub fn generate_pvc(&self) -> String {
112        format!(
113            r#"apiVersion: v1
114kind: PersistentVolumeClaim
115metadata:
116  name: {name}-kernel-cache
117  namespace: {namespace}
118  labels:
119    app.kubernetes.io/name: {name}
120    app.kubernetes.io/component: cache
121  annotations:
122    # Nutanix CSI volume annotations
123    csi.nutanix.com/storage-type: "NutanixVolumes"
124spec:
125  accessModes:
126    - ReadWriteOnce
127  storageClassName: {storage_class}
128  resources:
129    requests:
130      storage: {cache_size}"#,
131            name = self.config.name,
132            namespace = self.config.namespace,
133            storage_class = self.config.storage_class,
134            cache_size = self.config.kernel_cache_size
135        )
136    }
137
138    /// Generate the Deployment manifest with GPU resource requests and node affinity
139    pub fn generate_deployment(&self) -> String {
140        let gpu_resource = gpu_resource_key(&self.config.gpu_vendor);
141        let labels = self.merge_labels();
142        let annotations = self.merge_annotations();
143
144        let labels_yaml = format_yaml_map(&labels, 8);
145        let annotations_yaml = format_yaml_map(&annotations, 8);
146        let selector_labels = format!(
147            "app.kubernetes.io/name: {}\n        app.kubernetes.io/instance: {}",
148            self.config.name, self.config.name
149        );
150        let pod_labels_yaml = format_yaml_map(&labels, 12);
151
152        let node_affinity = self.generate_node_affinity();
153        let tolerations = self.generate_tolerations();
154
155        format!(
156            r#"apiVersion: apps/v1
157kind: Deployment
158metadata:
159  name: {name}
160  namespace: {namespace}
161  labels:
162{labels_yaml}
163  annotations:
164{annotations_yaml}
165spec:
166  replicas: {replicas}
167  selector:
168    matchLabels:
169      {selector_labels}
170  template:
171    metadata:
172      labels:
173{pod_labels_yaml}
174    spec:
175{node_affinity}
176{tolerations}
177      containers:
178        - name: cuda-wasm-worker
179          image: {image}
180          ports:
181            - containerPort: {port}
182              name: http
183              protocol: TCP
184          envFrom:
185            - configMapRef:
186                name: {name}-config
187          resources:
188            requests:
189              cpu: "{cpu_request}"
190              memory: "{mem_request}"
191              {gpu_resource}: "{gpu_count}"
192            limits:
193              cpu: "{cpu_limit}"
194              memory: "{mem_limit}"
195              {gpu_resource}: "{gpu_count}"
196          volumeMounts:
197            - name: kernel-cache
198              mountPath: /cache/kernels
199            - name: dshm
200              mountPath: /dev/shm
201          livenessProbe:
202            httpGet:
203              path: /healthz
204              port: http
205            initialDelaySeconds: 30
206            periodSeconds: 10
207          readinessProbe:
208            httpGet:
209              path: /readyz
210              port: http
211            initialDelaySeconds: 10
212            periodSeconds: 5
213      volumes:
214        - name: kernel-cache
215          persistentVolumeClaim:
216            claimName: {name}-kernel-cache
217        - name: dshm
218          emptyDir:
219            medium: Memory
220            sizeLimit: 8Gi"#,
221            name = self.config.name,
222            namespace = self.config.namespace,
223            replicas = self.config.replicas,
224            image = self.config.image,
225            port = self.config.service_port,
226            cpu_request = self.config.cpu_request,
227            cpu_limit = self.config.cpu_limit,
228            mem_request = self.config.memory_request,
229            mem_limit = self.config.memory_limit,
230            gpu_resource = gpu_resource,
231            gpu_count = self.config.gpus_per_pod,
232            labels_yaml = labels_yaml,
233            annotations_yaml = annotations_yaml,
234            selector_labels = selector_labels,
235            pod_labels_yaml = pod_labels_yaml,
236            node_affinity = node_affinity,
237            tolerations = tolerations,
238        )
239    }
240
241    /// Generate the Service manifest
242    pub fn generate_service(&self) -> String {
243        format!(
244            r#"apiVersion: v1
245kind: Service
246metadata:
247  name: {name}
248  namespace: {namespace}
249  labels:
250    app.kubernetes.io/name: {name}
251    app.kubernetes.io/component: api
252spec:
253  type: ClusterIP
254  ports:
255    - port: {port}
256      targetPort: http
257      protocol: TCP
258      name: http
259  selector:
260    app.kubernetes.io/name: {name}
261    app.kubernetes.io/instance: {name}"#,
262            name = self.config.name,
263            namespace = self.config.namespace,
264            port = self.config.service_port
265        )
266    }
267
268    /// Generate the HorizontalPodAutoscaler manifest
269    pub fn generate_hpa(&self) -> String {
270        let gpu_resource = gpu_resource_key(&self.config.gpu_vendor);
271
272        format!(
273            r#"apiVersion: autoscaling/v2
274kind: HorizontalPodAutoscaler
275metadata:
276  name: {name}-hpa
277  namespace: {namespace}
278  labels:
279    app.kubernetes.io/name: {name}
280    app.kubernetes.io/component: autoscaler
281spec:
282  scaleTargetRef:
283    apiVersion: apps/v1
284    kind: Deployment
285    name: {name}
286  minReplicas: {min}
287  maxReplicas: {max}
288  metrics:
289    - type: Resource
290      resource:
291        name: cpu
292        target:
293          type: Utilization
294          averageUtilization: 80
295    - type: Pods
296      pods:
297        metric:
298          name: {gpu_resource}_utilization
299        target:
300          type: AverageValue
301          averageValue: "{target_util}"
302  behavior:
303    scaleUp:
304      stabilizationWindowSeconds: 60
305      policies:
306        - type: Pods
307          value: 2
308          periodSeconds: 60
309    scaleDown:
310      stabilizationWindowSeconds: 300
311      policies:
312        - type: Pods
313          value: 1
314          periodSeconds: 120"#,
315            name = self.config.name,
316            namespace = self.config.namespace,
317            min = self.config.hpa_min_replicas,
318            max = self.config.hpa_max_replicas,
319            gpu_resource = gpu_resource.replace('/', "_"),
320            target_util = self.config.hpa_target_gpu_utilization,
321        )
322    }
323
324    // --- Private helpers ---
325
326    /// Merge default labels with user-supplied labels
327    fn merge_labels(&self) -> HashMap<String, String> {
328        let mut labels = HashMap::new();
329        labels.insert(
330            "app.kubernetes.io/name".to_string(),
331            self.config.name.clone(),
332        );
333        labels.insert(
334            "app.kubernetes.io/instance".to_string(),
335            self.config.name.clone(),
336        );
337        labels.insert(
338            "app.kubernetes.io/component".to_string(),
339            "gpu-worker".to_string(),
340        );
341        labels.insert(
342            "app.kubernetes.io/part-of".to_string(),
343            "cuda-wasm".to_string(),
344        );
345        labels.insert(
346            "app.kubernetes.io/managed-by".to_string(),
347            "cuda-wasm-deployer".to_string(),
348        );
349
350        // Add GPU vendor label
351        let vendor_label = match &self.config.gpu_vendor {
352            GpuVendor::Nvidia => "nvidia",
353            GpuVendor::Amd => "amd",
354            GpuVendor::Intel => "intel",
355            GpuVendor::Unknown(v) => v.as_str(),
356        };
357        labels.insert("cuda-wasm/gpu-vendor".to_string(), vendor_label.to_string());
358
359        // Merge user labels
360        for (k, v) in &self.config.labels {
361            labels.insert(k.clone(), v.clone());
362        }
363
364        labels
365    }
366
367    /// Merge default annotations with user-supplied and NKE-specific annotations
368    fn merge_annotations(&self) -> HashMap<String, String> {
369        let mut annotations = HashMap::new();
370
371        // NKE-specific annotations
372        annotations.insert(
373            "nke.nutanix.com/gpu-enabled".to_string(),
374            "true".to_string(),
375        );
376        annotations.insert(
377            "nke.nutanix.com/cluster-type".to_string(),
378            "gpu-workload".to_string(),
379        );
380
381        // Merge user annotations
382        for (k, v) in &self.config.annotations {
383            annotations.insert(k.clone(), v.clone());
384        }
385
386        annotations
387    }
388
389    /// Generate node affinity rules for GPU vendor selection
390    fn generate_node_affinity(&self) -> String {
391        let vendor_label_value = match &self.config.gpu_vendor {
392            GpuVendor::Nvidia => "nvidia",
393            GpuVendor::Amd => "amd",
394            GpuVendor::Intel => "intel",
395            GpuVendor::Unknown(v) => v.as_str(),
396        };
397
398        format!(
399            r#"      affinity:
400        nodeAffinity:
401          requiredDuringSchedulingIgnoredDuringExecution:
402            nodeSelectorTerms:
403              - matchExpressions:
404                  - key: nvidia.com/gpu.present
405                    operator: In
406                    values:
407                      - "true"
408                  - key: feature.node.kubernetes.io/pci-{vendor}.present
409                    operator: In
410                    values:
411                      - "true"
412          preferredDuringSchedulingIgnoredDuringExecution:
413            - weight: 100
414              preference:
415                matchExpressions:
416                  - key: cuda-wasm/gpu-vendor
417                    operator: In
418                    values:
419                      - "{vendor}""#,
420            vendor = vendor_label_value,
421        )
422    }
423
424    /// Generate tolerations for GPU nodes
425    fn generate_tolerations(&self) -> String {
426        r#"      tolerations:
427        - key: nvidia.com/gpu
428          operator: Exists
429          effect: NoSchedule
430        - key: amd.com/gpu
431          operator: Exists
432          effect: NoSchedule
433        - key: "node-role.kubernetes.io/gpu"
434          operator: Exists
435          effect: NoSchedule"#
436            .to_string()
437    }
438}
439
440/// Get the Kubernetes GPU resource key for a given vendor
441pub fn gpu_resource_key(vendor: &GpuVendor) -> &'static str {
442    match vendor {
443        GpuVendor::Nvidia => "nvidia.com/gpu",
444        GpuVendor::Amd => "amd.com/gpu",
445        GpuVendor::Intel => "gpu.intel.com/i915",
446        GpuVendor::Unknown(_) => "nvidia.com/gpu", // default to NVIDIA
447    }
448}
449
450/// Format a HashMap as indented YAML key-value pairs
451fn format_yaml_map(map: &HashMap<String, String>, indent: usize) -> String {
452    let prefix = " ".repeat(indent);
453    let mut pairs: Vec<_> = map.iter().collect();
454    pairs.sort_by_key(|(k, _)| (*k).clone());
455
456    pairs
457        .iter()
458        .map(|(k, v)| format!("{}{}: \"{}\"", prefix, k, v))
459        .collect::<Vec<_>>()
460        .join("\n")
461}
462
463#[cfg(test)]
464mod tests {
465    use super::*;
466
467    fn test_config() -> DeploymentConfig {
468        DeploymentConfig::new("test-workload", "cuda-wasm:v1.0")
469            .with_gpu_vendor(GpuVendor::Nvidia)
470            .with_gpus(2)
471            .with_hpa(1, 4, 75)
472    }
473
474    #[test]
475    fn test_generate_namespace() {
476        let gen = DeploymentGenerator::new(test_config());
477        let yaml = gen.generate_namespace();
478        assert!(yaml.contains("kind: Namespace"));
479        assert!(yaml.contains("name: cuda-wasm"));
480    }
481
482    #[test]
483    fn test_generate_configmap() {
484        let gen = DeploymentGenerator::new(test_config());
485        let yaml = gen.generate_configmap();
486        assert!(yaml.contains("kind: ConfigMap"));
487        assert!(yaml.contains("CUDA_WASM_GPU_BACKEND: \"cuda\""));
488        assert!(yaml.contains("CUDA_WASM_GPU_COUNT: \"2\""));
489    }
490
491    #[test]
492    fn test_generate_pvc() {
493        let gen = DeploymentGenerator::new(test_config());
494        let yaml = gen.generate_pvc();
495        assert!(yaml.contains("kind: PersistentVolumeClaim"));
496        assert!(yaml.contains("storageClassName: nutanix-volume"));
497        assert!(yaml.contains("csi.nutanix.com/storage-type"));
498    }
499
500    #[test]
501    fn test_generate_deployment_nvidia() {
502        let gen = DeploymentGenerator::new(test_config());
503        let yaml = gen.generate_deployment();
504        assert!(yaml.contains("kind: Deployment"));
505        assert!(yaml.contains("nvidia.com/gpu: \"2\""));
506        assert!(yaml.contains("image: cuda-wasm:v1.0"));
507        assert!(yaml.contains("nke.nutanix.com/gpu-enabled"));
508    }
509
510    #[test]
511    fn test_generate_deployment_amd() {
512        let config = DeploymentConfig::new("amd-workload", "cuda-wasm:v1.0")
513            .with_gpu_vendor(GpuVendor::Amd);
514        let gen = DeploymentGenerator::new(config);
515        let yaml = gen.generate_deployment();
516        assert!(yaml.contains("amd.com/gpu: \"1\""));
517    }
518
519    #[test]
520    fn test_generate_service() {
521        let gen = DeploymentGenerator::new(test_config());
522        let yaml = gen.generate_service();
523        assert!(yaml.contains("kind: Service"));
524        assert!(yaml.contains("port: 8080"));
525    }
526
527    #[test]
528    fn test_generate_hpa() {
529        let gen = DeploymentGenerator::new(test_config());
530        let yaml = gen.generate_hpa();
531        assert!(yaml.contains("kind: HorizontalPodAutoscaler"));
532        assert!(yaml.contains("minReplicas: 1"));
533        assert!(yaml.contains("maxReplicas: 4"));
534    }
535
536    #[test]
537    fn test_generate_all() {
538        let gen = DeploymentGenerator::new(test_config());
539        let yaml = gen.generate_all();
540        // All sections should be present
541        assert!(yaml.contains("kind: Namespace"));
542        assert!(yaml.contains("kind: ConfigMap"));
543        assert!(yaml.contains("kind: PersistentVolumeClaim"));
544        assert!(yaml.contains("kind: Deployment"));
545        assert!(yaml.contains("kind: Service"));
546        assert!(yaml.contains("kind: HorizontalPodAutoscaler"));
547        // Sections separated by ---
548        assert!(yaml.matches("---").count() >= 5);
549    }
550
551    #[test]
552    fn test_gpu_resource_key() {
553        assert_eq!(gpu_resource_key(&GpuVendor::Nvidia), "nvidia.com/gpu");
554        assert_eq!(gpu_resource_key(&GpuVendor::Amd), "amd.com/gpu");
555        assert_eq!(gpu_resource_key(&GpuVendor::Intel), "gpu.intel.com/i915");
556    }
557}