1use super::config::*;
14use std::collections::HashMap;
15
16pub struct DeploymentGenerator {
18 config: DeploymentConfig,
19}
20
21impl DeploymentGenerator {
22 pub fn new(config: DeploymentConfig) -> Self {
24 Self { config }
25 }
26
27 pub fn generate_all(&self) -> String {
37 let mut manifests = vec![
38 self.generate_namespace(),
39 self.generate_configmap(),
40 self.generate_pvc(),
41 self.generate_deployment(),
42 self.generate_service(),
43 ];
44
45 if self.config.enable_hpa {
46 manifests.push(self.generate_hpa());
47 }
48
49 manifests.join("\n---\n")
50 }
51
52 pub fn generate_namespace(&self) -> String {
54 format!(
55 r#"apiVersion: v1
56kind: Namespace
57metadata:
58 name: {namespace}
59 labels:
60 app.kubernetes.io/part-of: cuda-wasm
61 platform: nutanix-nke"#,
62 namespace = self.config.namespace
63 )
64 }
65
66 pub fn generate_configmap(&self) -> String {
68 let mut env_entries = String::new();
69 for (key, value) in &self.config.env_vars {
70 env_entries.push_str(&format!(" {}={}\n", key, value));
71 }
72
73 let gpu_backend = match &self.config.gpu_vendor {
74 GpuVendor::Nvidia => "cuda",
75 GpuVendor::Amd => "rocm",
76 GpuVendor::Intel => "oneapi",
77 GpuVendor::Unknown(_) => "webgpu",
78 };
79
80 format!(
81 r#"apiVersion: v1
82kind: ConfigMap
83metadata:
84 name: {name}-config
85 namespace: {namespace}
86 labels:
87 app.kubernetes.io/name: {name}
88 app.kubernetes.io/component: config
89data:
90 CUDA_WASM_GPU_BACKEND: "{gpu_backend}"
91 CUDA_WASM_GPU_COUNT: "{gpu_count}"
92 CUDA_WASM_KERNEL_CACHE_DIR: "/cache/kernels"
93 CUDA_WASM_LOG_LEVEL: "info"
94 CUDA_WASM_WEBGPU_ENABLED: "true"
95 CUDA_WASM_MEMORY_POOL_SIZE: "2147483648"
96 CUDA_WASM_MAX_CONCURRENT_KERNELS: "16"
97{env_entries}"#,
98 name = self.config.name,
99 namespace = self.config.namespace,
100 gpu_backend = gpu_backend,
101 gpu_count = self.config.gpus_per_pod,
102 env_entries = if env_entries.is_empty() {
103 String::new()
104 } else {
105 format!(" # Custom environment variables\n{}", env_entries)
106 }
107 )
108 }
109
110 pub fn generate_pvc(&self) -> String {
112 format!(
113 r#"apiVersion: v1
114kind: PersistentVolumeClaim
115metadata:
116 name: {name}-kernel-cache
117 namespace: {namespace}
118 labels:
119 app.kubernetes.io/name: {name}
120 app.kubernetes.io/component: cache
121 annotations:
122 # Nutanix CSI volume annotations
123 csi.nutanix.com/storage-type: "NutanixVolumes"
124spec:
125 accessModes:
126 - ReadWriteOnce
127 storageClassName: {storage_class}
128 resources:
129 requests:
130 storage: {cache_size}"#,
131 name = self.config.name,
132 namespace = self.config.namespace,
133 storage_class = self.config.storage_class,
134 cache_size = self.config.kernel_cache_size
135 )
136 }
137
138 pub fn generate_deployment(&self) -> String {
140 let gpu_resource = gpu_resource_key(&self.config.gpu_vendor);
141 let labels = self.merge_labels();
142 let annotations = self.merge_annotations();
143
144 let labels_yaml = format_yaml_map(&labels, 8);
145 let annotations_yaml = format_yaml_map(&annotations, 8);
146 let selector_labels = format!(
147 "app.kubernetes.io/name: {}\n app.kubernetes.io/instance: {}",
148 self.config.name, self.config.name
149 );
150 let pod_labels_yaml = format_yaml_map(&labels, 12);
151
152 let node_affinity = self.generate_node_affinity();
153 let tolerations = self.generate_tolerations();
154
155 format!(
156 r#"apiVersion: apps/v1
157kind: Deployment
158metadata:
159 name: {name}
160 namespace: {namespace}
161 labels:
162{labels_yaml}
163 annotations:
164{annotations_yaml}
165spec:
166 replicas: {replicas}
167 selector:
168 matchLabels:
169 {selector_labels}
170 template:
171 metadata:
172 labels:
173{pod_labels_yaml}
174 spec:
175{node_affinity}
176{tolerations}
177 containers:
178 - name: cuda-wasm-worker
179 image: {image}
180 ports:
181 - containerPort: {port}
182 name: http
183 protocol: TCP
184 envFrom:
185 - configMapRef:
186 name: {name}-config
187 resources:
188 requests:
189 cpu: "{cpu_request}"
190 memory: "{mem_request}"
191 {gpu_resource}: "{gpu_count}"
192 limits:
193 cpu: "{cpu_limit}"
194 memory: "{mem_limit}"
195 {gpu_resource}: "{gpu_count}"
196 volumeMounts:
197 - name: kernel-cache
198 mountPath: /cache/kernels
199 - name: dshm
200 mountPath: /dev/shm
201 livenessProbe:
202 httpGet:
203 path: /healthz
204 port: http
205 initialDelaySeconds: 30
206 periodSeconds: 10
207 readinessProbe:
208 httpGet:
209 path: /readyz
210 port: http
211 initialDelaySeconds: 10
212 periodSeconds: 5
213 volumes:
214 - name: kernel-cache
215 persistentVolumeClaim:
216 claimName: {name}-kernel-cache
217 - name: dshm
218 emptyDir:
219 medium: Memory
220 sizeLimit: 8Gi"#,
221 name = self.config.name,
222 namespace = self.config.namespace,
223 replicas = self.config.replicas,
224 image = self.config.image,
225 port = self.config.service_port,
226 cpu_request = self.config.cpu_request,
227 cpu_limit = self.config.cpu_limit,
228 mem_request = self.config.memory_request,
229 mem_limit = self.config.memory_limit,
230 gpu_resource = gpu_resource,
231 gpu_count = self.config.gpus_per_pod,
232 labels_yaml = labels_yaml,
233 annotations_yaml = annotations_yaml,
234 selector_labels = selector_labels,
235 pod_labels_yaml = pod_labels_yaml,
236 node_affinity = node_affinity,
237 tolerations = tolerations,
238 )
239 }
240
241 pub fn generate_service(&self) -> String {
243 format!(
244 r#"apiVersion: v1
245kind: Service
246metadata:
247 name: {name}
248 namespace: {namespace}
249 labels:
250 app.kubernetes.io/name: {name}
251 app.kubernetes.io/component: api
252spec:
253 type: ClusterIP
254 ports:
255 - port: {port}
256 targetPort: http
257 protocol: TCP
258 name: http
259 selector:
260 app.kubernetes.io/name: {name}
261 app.kubernetes.io/instance: {name}"#,
262 name = self.config.name,
263 namespace = self.config.namespace,
264 port = self.config.service_port
265 )
266 }
267
268 pub fn generate_hpa(&self) -> String {
270 let gpu_resource = gpu_resource_key(&self.config.gpu_vendor);
271
272 format!(
273 r#"apiVersion: autoscaling/v2
274kind: HorizontalPodAutoscaler
275metadata:
276 name: {name}-hpa
277 namespace: {namespace}
278 labels:
279 app.kubernetes.io/name: {name}
280 app.kubernetes.io/component: autoscaler
281spec:
282 scaleTargetRef:
283 apiVersion: apps/v1
284 kind: Deployment
285 name: {name}
286 minReplicas: {min}
287 maxReplicas: {max}
288 metrics:
289 - type: Resource
290 resource:
291 name: cpu
292 target:
293 type: Utilization
294 averageUtilization: 80
295 - type: Pods
296 pods:
297 metric:
298 name: {gpu_resource}_utilization
299 target:
300 type: AverageValue
301 averageValue: "{target_util}"
302 behavior:
303 scaleUp:
304 stabilizationWindowSeconds: 60
305 policies:
306 - type: Pods
307 value: 2
308 periodSeconds: 60
309 scaleDown:
310 stabilizationWindowSeconds: 300
311 policies:
312 - type: Pods
313 value: 1
314 periodSeconds: 120"#,
315 name = self.config.name,
316 namespace = self.config.namespace,
317 min = self.config.hpa_min_replicas,
318 max = self.config.hpa_max_replicas,
319 gpu_resource = gpu_resource.replace('/', "_"),
320 target_util = self.config.hpa_target_gpu_utilization,
321 )
322 }
323
324 fn merge_labels(&self) -> HashMap<String, String> {
328 let mut labels = HashMap::new();
329 labels.insert(
330 "app.kubernetes.io/name".to_string(),
331 self.config.name.clone(),
332 );
333 labels.insert(
334 "app.kubernetes.io/instance".to_string(),
335 self.config.name.clone(),
336 );
337 labels.insert(
338 "app.kubernetes.io/component".to_string(),
339 "gpu-worker".to_string(),
340 );
341 labels.insert(
342 "app.kubernetes.io/part-of".to_string(),
343 "cuda-wasm".to_string(),
344 );
345 labels.insert(
346 "app.kubernetes.io/managed-by".to_string(),
347 "cuda-wasm-deployer".to_string(),
348 );
349
350 let vendor_label = match &self.config.gpu_vendor {
352 GpuVendor::Nvidia => "nvidia",
353 GpuVendor::Amd => "amd",
354 GpuVendor::Intel => "intel",
355 GpuVendor::Unknown(v) => v.as_str(),
356 };
357 labels.insert("cuda-wasm/gpu-vendor".to_string(), vendor_label.to_string());
358
359 for (k, v) in &self.config.labels {
361 labels.insert(k.clone(), v.clone());
362 }
363
364 labels
365 }
366
367 fn merge_annotations(&self) -> HashMap<String, String> {
369 let mut annotations = HashMap::new();
370
371 annotations.insert(
373 "nke.nutanix.com/gpu-enabled".to_string(),
374 "true".to_string(),
375 );
376 annotations.insert(
377 "nke.nutanix.com/cluster-type".to_string(),
378 "gpu-workload".to_string(),
379 );
380
381 for (k, v) in &self.config.annotations {
383 annotations.insert(k.clone(), v.clone());
384 }
385
386 annotations
387 }
388
389 fn generate_node_affinity(&self) -> String {
391 let vendor_label_value = match &self.config.gpu_vendor {
392 GpuVendor::Nvidia => "nvidia",
393 GpuVendor::Amd => "amd",
394 GpuVendor::Intel => "intel",
395 GpuVendor::Unknown(v) => v.as_str(),
396 };
397
398 format!(
399 r#" affinity:
400 nodeAffinity:
401 requiredDuringSchedulingIgnoredDuringExecution:
402 nodeSelectorTerms:
403 - matchExpressions:
404 - key: nvidia.com/gpu.present
405 operator: In
406 values:
407 - "true"
408 - key: feature.node.kubernetes.io/pci-{vendor}.present
409 operator: In
410 values:
411 - "true"
412 preferredDuringSchedulingIgnoredDuringExecution:
413 - weight: 100
414 preference:
415 matchExpressions:
416 - key: cuda-wasm/gpu-vendor
417 operator: In
418 values:
419 - "{vendor}""#,
420 vendor = vendor_label_value,
421 )
422 }
423
424 fn generate_tolerations(&self) -> String {
426 r#" tolerations:
427 - key: nvidia.com/gpu
428 operator: Exists
429 effect: NoSchedule
430 - key: amd.com/gpu
431 operator: Exists
432 effect: NoSchedule
433 - key: "node-role.kubernetes.io/gpu"
434 operator: Exists
435 effect: NoSchedule"#
436 .to_string()
437 }
438}
439
440pub fn gpu_resource_key(vendor: &GpuVendor) -> &'static str {
442 match vendor {
443 GpuVendor::Nvidia => "nvidia.com/gpu",
444 GpuVendor::Amd => "amd.com/gpu",
445 GpuVendor::Intel => "gpu.intel.com/i915",
446 GpuVendor::Unknown(_) => "nvidia.com/gpu", }
448}
449
450fn format_yaml_map(map: &HashMap<String, String>, indent: usize) -> String {
452 let prefix = " ".repeat(indent);
453 let mut pairs: Vec<_> = map.iter().collect();
454 pairs.sort_by_key(|(k, _)| (*k).clone());
455
456 pairs
457 .iter()
458 .map(|(k, v)| format!("{}{}: \"{}\"", prefix, k, v))
459 .collect::<Vec<_>>()
460 .join("\n")
461}
462
463#[cfg(test)]
464mod tests {
465 use super::*;
466
467 fn test_config() -> DeploymentConfig {
468 DeploymentConfig::new("test-workload", "cuda-wasm:v1.0")
469 .with_gpu_vendor(GpuVendor::Nvidia)
470 .with_gpus(2)
471 .with_hpa(1, 4, 75)
472 }
473
474 #[test]
475 fn test_generate_namespace() {
476 let gen = DeploymentGenerator::new(test_config());
477 let yaml = gen.generate_namespace();
478 assert!(yaml.contains("kind: Namespace"));
479 assert!(yaml.contains("name: cuda-wasm"));
480 }
481
482 #[test]
483 fn test_generate_configmap() {
484 let gen = DeploymentGenerator::new(test_config());
485 let yaml = gen.generate_configmap();
486 assert!(yaml.contains("kind: ConfigMap"));
487 assert!(yaml.contains("CUDA_WASM_GPU_BACKEND: \"cuda\""));
488 assert!(yaml.contains("CUDA_WASM_GPU_COUNT: \"2\""));
489 }
490
491 #[test]
492 fn test_generate_pvc() {
493 let gen = DeploymentGenerator::new(test_config());
494 let yaml = gen.generate_pvc();
495 assert!(yaml.contains("kind: PersistentVolumeClaim"));
496 assert!(yaml.contains("storageClassName: nutanix-volume"));
497 assert!(yaml.contains("csi.nutanix.com/storage-type"));
498 }
499
500 #[test]
501 fn test_generate_deployment_nvidia() {
502 let gen = DeploymentGenerator::new(test_config());
503 let yaml = gen.generate_deployment();
504 assert!(yaml.contains("kind: Deployment"));
505 assert!(yaml.contains("nvidia.com/gpu: \"2\""));
506 assert!(yaml.contains("image: cuda-wasm:v1.0"));
507 assert!(yaml.contains("nke.nutanix.com/gpu-enabled"));
508 }
509
510 #[test]
511 fn test_generate_deployment_amd() {
512 let config = DeploymentConfig::new("amd-workload", "cuda-wasm:v1.0")
513 .with_gpu_vendor(GpuVendor::Amd);
514 let gen = DeploymentGenerator::new(config);
515 let yaml = gen.generate_deployment();
516 assert!(yaml.contains("amd.com/gpu: \"1\""));
517 }
518
519 #[test]
520 fn test_generate_service() {
521 let gen = DeploymentGenerator::new(test_config());
522 let yaml = gen.generate_service();
523 assert!(yaml.contains("kind: Service"));
524 assert!(yaml.contains("port: 8080"));
525 }
526
527 #[test]
528 fn test_generate_hpa() {
529 let gen = DeploymentGenerator::new(test_config());
530 let yaml = gen.generate_hpa();
531 assert!(yaml.contains("kind: HorizontalPodAutoscaler"));
532 assert!(yaml.contains("minReplicas: 1"));
533 assert!(yaml.contains("maxReplicas: 4"));
534 }
535
536 #[test]
537 fn test_generate_all() {
538 let gen = DeploymentGenerator::new(test_config());
539 let yaml = gen.generate_all();
540 assert!(yaml.contains("kind: Namespace"));
542 assert!(yaml.contains("kind: ConfigMap"));
543 assert!(yaml.contains("kind: PersistentVolumeClaim"));
544 assert!(yaml.contains("kind: Deployment"));
545 assert!(yaml.contains("kind: Service"));
546 assert!(yaml.contains("kind: HorizontalPodAutoscaler"));
547 assert!(yaml.matches("---").count() >= 5);
549 }
550
551 #[test]
552 fn test_gpu_resource_key() {
553 assert_eq!(gpu_resource_key(&GpuVendor::Nvidia), "nvidia.com/gpu");
554 assert_eq!(gpu_resource_key(&GpuVendor::Amd), "amd.com/gpu");
555 assert_eq!(gpu_resource_key(&GpuVendor::Intel), "gpu.intel.com/i915");
556 }
557}