1use crate::error::{ErrorData, Result};
11use crate::instance_catalog::Architecture;
12use crate::resource::{ResourceDefinition, ResourceOutputsDefinition, ResourceRef};
13use crate::ResourceType;
14use alien_error::AlienError;
15use bon::Builder;
16use serde::{Deserialize, Serialize};
17use std::any::Any;
18use std::fmt::Debug;
19
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
22#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
23#[serde(rename_all = "camelCase")]
24pub struct GpuSpec {
25 #[serde(rename = "type")]
27 pub gpu_type: String,
28 pub count: u32,
30}
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
38#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
39#[serde(rename_all = "camelCase")]
40pub struct MachineProfile {
41 pub cpu: String,
44 pub memory_bytes: u64,
46 pub ephemeral_storage_bytes: u64,
48 #[serde(skip_serializing_if = "Option::is_none")]
50 pub architecture: Option<Architecture>,
51 #[serde(skip_serializing_if = "Option::is_none")]
53 pub gpu: Option<GpuSpec>,
54}
55
56#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
58#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
59#[serde(rename_all = "camelCase")]
60pub struct ComputeChoiceRange {
61 pub min: u32,
63 pub max: u32,
65 pub default: u32,
67}
68
69impl ComputeChoiceRange {
70 pub fn contains(&self, value: u32) -> bool {
72 self.min <= value && value <= self.max
73 }
74}
75
76#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
78#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
79#[serde(rename_all = "camelCase", tag = "type")]
80pub enum CapacityGroupScalePolicy {
81 Fixed {
83 machines: ComputeChoiceRange,
85 },
86 Autoscale {
88 min: ComputeChoiceRange,
90 max: ComputeChoiceRange,
92 },
93}
94
95impl CapacityGroupScalePolicy {
96 pub fn from_selected_bounds(min_size: u32, max_size: u32) -> Self {
98 if min_size == max_size {
99 Self::Fixed {
100 machines: ComputeChoiceRange {
101 min: min_size,
102 max: max_size,
103 default: min_size,
104 },
105 }
106 } else {
107 Self::Autoscale {
108 min: ComputeChoiceRange {
109 min: min_size,
110 max: min_size,
111 default: min_size,
112 },
113 max: ComputeChoiceRange {
114 min: max_size,
115 max: max_size,
116 default: max_size,
117 },
118 }
119 }
120 }
121
122 pub fn default_min_size(&self) -> u32 {
124 match self {
125 Self::Fixed { machines } => machines.default,
126 Self::Autoscale { min, .. } => min.default,
127 }
128 }
129
130 pub fn default_max_size(&self) -> u32 {
132 match self {
133 Self::Fixed { machines } => machines.default,
134 Self::Autoscale { max, .. } => max.default,
135 }
136 }
137}
138
139#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
145#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
146#[serde(rename_all = "camelCase")]
147pub struct CapacityGroup {
148 pub group_id: String,
150 #[serde(skip_serializing_if = "Option::is_none")]
154 pub instance_type: Option<String>,
155 #[serde(skip_serializing_if = "Option::is_none")]
157 pub profile: Option<MachineProfile>,
158 pub min_size: u32,
160 pub max_size: u32,
162 #[serde(skip_serializing_if = "Option::is_none")]
167 pub scale_policy: Option<CapacityGroupScalePolicy>,
168 #[serde(skip_serializing_if = "Option::is_none")]
174 pub nested_virtualization: Option<bool>,
175}
176
177#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Builder)]
214#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
215#[serde(rename_all = "camelCase", deny_unknown_fields)]
216#[builder(start_fn = new)]
217pub struct ComputeCluster {
218 #[builder(start_fn)]
221 pub id: String,
222
223 #[builder(field)]
226 pub capacity_groups: Vec<CapacityGroup>,
227
228 #[serde(skip_serializing_if = "Option::is_none")]
232 pub container_cidr: Option<String>,
233}
234
235impl ComputeCluster {
236 pub const RESOURCE_TYPE: ResourceType = ResourceType::from_static("compute-cluster");
238
239 pub fn id(&self) -> &str {
241 &self.id
242 }
243
244 pub fn container_cidr(&self) -> &str {
246 self.container_cidr.as_deref().unwrap_or("10.244.0.0/16")
247 }
248}
249
250impl<S: compute_cluster_builder::State> ComputeClusterBuilder<S> {
251 pub fn capacity_group(mut self, group: CapacityGroup) -> Self {
253 self.capacity_groups.push(group);
254 self
255 }
256}
257
258#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
260#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
261#[serde(rename_all = "camelCase")]
262pub struct CapacityGroupStatus {
263 pub group_id: String,
265 pub current_machines: u32,
267 pub desired_machines: u32,
269 pub instance_type: String,
271}
272
273#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
275#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
276#[serde(rename_all = "camelCase")]
277pub struct ComputeClusterOutputs {
278 pub cluster_id: String,
280 pub horizon_ready: bool,
282 pub capacity_group_statuses: Vec<CapacityGroupStatus>,
284 pub total_machines: u32,
286}
287
288impl ResourceOutputsDefinition for ComputeClusterOutputs {
289 fn get_resource_type(&self) -> ResourceType {
290 ComputeCluster::RESOURCE_TYPE.clone()
291 }
292
293 fn as_any(&self) -> &dyn Any {
294 self
295 }
296
297 fn box_clone(&self) -> Box<dyn ResourceOutputsDefinition> {
298 Box::new(self.clone())
299 }
300
301 fn outputs_eq(&self, other: &dyn ResourceOutputsDefinition) -> bool {
302 other.as_any().downcast_ref::<ComputeClusterOutputs>() == Some(self)
303 }
304
305 fn to_json_value(&self) -> serde_json::Result<serde_json::Value> {
306 serde_json::to_value(self)
307 }
308}
309
310impl ResourceDefinition for ComputeCluster {
311 fn get_resource_type(&self) -> ResourceType {
312 Self::RESOURCE_TYPE
313 }
314
315 fn id(&self) -> &str {
316 &self.id
317 }
318
319 fn get_dependencies(&self) -> Vec<ResourceRef> {
320 Vec::new()
326 }
327
328 fn validate_update(&self, new_config: &dyn ResourceDefinition) -> Result<()> {
329 let new_cluster = new_config
330 .as_any()
331 .downcast_ref::<ComputeCluster>()
332 .ok_or_else(|| {
333 AlienError::new(ErrorData::UnexpectedResourceType {
334 resource_id: self.id.clone(),
335 expected: Self::RESOURCE_TYPE,
336 actual: new_config.get_resource_type(),
337 })
338 })?;
339
340 if self.id != new_cluster.id {
341 return Err(AlienError::new(ErrorData::InvalidResourceUpdate {
342 resource_id: self.id.clone(),
343 reason: "the 'id' field is immutable".to_string(),
344 }));
345 }
346
347 if self.container_cidr.is_some()
349 && new_cluster.container_cidr.is_some()
350 && self.container_cidr != new_cluster.container_cidr
351 {
352 return Err(AlienError::new(ErrorData::InvalidResourceUpdate {
353 resource_id: self.id.clone(),
354 reason: "the 'containerCidr' field is immutable once set".to_string(),
355 }));
356 }
357
358 for new_group in &new_cluster.capacity_groups {
360 if let Some(existing_group) = self
361 .capacity_groups
362 .iter()
363 .find(|g| g.group_id == new_group.group_id)
364 {
365 if existing_group.instance_type.is_some()
367 && new_group.instance_type.is_some()
368 && existing_group.instance_type != new_group.instance_type
369 {
370 return Err(AlienError::new(ErrorData::InvalidResourceUpdate {
371 resource_id: self.id.clone(),
372 reason: format!(
373 "instance type for capacity group '{}' is immutable",
374 new_group.group_id
375 ),
376 }));
377 }
378 }
379 }
380
381 Ok(())
382 }
383
384 fn as_any(&self) -> &dyn Any {
385 self
386 }
387
388 fn as_any_mut(&mut self) -> &mut dyn Any {
389 self
390 }
391
392 fn box_clone(&self) -> Box<dyn ResourceDefinition> {
393 Box::new(self.clone())
394 }
395
396 fn resource_eq(&self, other: &dyn ResourceDefinition) -> bool {
397 other.as_any().downcast_ref::<ComputeCluster>() == Some(self)
398 }
399
400 fn to_json_value(&self) -> serde_json::Result<serde_json::Value> {
401 serde_json::to_value(self)
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408
409 #[test]
410 fn test_compute_cluster_creation() {
411 let cluster = ComputeCluster::new("compute".to_string())
412 .capacity_group(CapacityGroup {
413 group_id: "general".to_string(),
414 instance_type: Some("m7g.xlarge".to_string()),
415 profile: None,
416 min_size: 1,
417 max_size: 5,
418 scale_policy: None,
419 nested_virtualization: None,
420 })
421 .build();
422
423 assert_eq!(cluster.id(), "compute");
424 assert_eq!(cluster.capacity_groups.len(), 1);
425 assert_eq!(cluster.capacity_groups[0].group_id, "general");
426 assert_eq!(cluster.container_cidr(), "10.244.0.0/16");
427 }
428
429 #[test]
430 fn test_compute_cluster_multiple_capacity_groups() {
431 let cluster = ComputeCluster::new("multi-pool".to_string())
432 .capacity_group(CapacityGroup {
433 group_id: "general".to_string(),
434 instance_type: Some("m7g.xlarge".to_string()),
435 profile: None,
436 min_size: 1,
437 max_size: 3,
438 scale_policy: None,
439 nested_virtualization: None,
440 })
441 .capacity_group(CapacityGroup {
442 group_id: "gpu".to_string(),
443 instance_type: Some("g5.xlarge".to_string()),
444 profile: Some(MachineProfile {
445 cpu: "4.0".to_string(),
446 memory_bytes: 17179869184, ephemeral_storage_bytes: 214748364800, architecture: None,
449 gpu: Some(GpuSpec {
450 gpu_type: "nvidia-a10g".to_string(),
451 count: 1,
452 }),
453 }),
454 min_size: 0,
455 max_size: 2,
456 scale_policy: None,
457 nested_virtualization: None,
458 })
459 .build();
460
461 assert_eq!(cluster.capacity_groups.len(), 2);
462 assert_eq!(cluster.capacity_groups[0].group_id, "general");
463 assert_eq!(cluster.capacity_groups[1].group_id, "gpu");
464 assert!(cluster.capacity_groups[1]
465 .profile
466 .as_ref()
467 .unwrap()
468 .gpu
469 .is_some());
470 }
471
472 #[test]
473 fn test_compute_cluster_custom_cidr() {
474 let cluster = ComputeCluster::new("custom-net".to_string())
475 .container_cidr("172.30.0.0/16".to_string())
476 .capacity_group(CapacityGroup {
477 group_id: "general".to_string(),
478 instance_type: None,
479 profile: None,
480 min_size: 1,
481 max_size: 5,
482 scale_policy: None,
483 nested_virtualization: None,
484 })
485 .build();
486
487 assert_eq!(cluster.container_cidr(), "172.30.0.0/16");
488 }
489
490 #[test]
491 fn test_compute_cluster_validate_update_immutable_id() {
492 let cluster1 = ComputeCluster::new("cluster-1".to_string())
493 .capacity_group(CapacityGroup {
494 group_id: "general".to_string(),
495 instance_type: None,
496 profile: None,
497 min_size: 1,
498 max_size: 5,
499 scale_policy: None,
500 nested_virtualization: None,
501 })
502 .build();
503
504 let cluster2 = ComputeCluster::new("cluster-2".to_string())
505 .capacity_group(CapacityGroup {
506 group_id: "general".to_string(),
507 instance_type: None,
508 profile: None,
509 min_size: 1,
510 max_size: 5,
511 scale_policy: None,
512 nested_virtualization: None,
513 })
514 .build();
515
516 let result = cluster1.validate_update(&cluster2);
517 assert!(result.is_err());
518 }
519
520 #[test]
521 fn test_compute_cluster_validate_update_scale_change() {
522 let cluster1 = ComputeCluster::new("compute".to_string())
523 .capacity_group(CapacityGroup {
524 group_id: "general".to_string(),
525 instance_type: Some("m7g.xlarge".to_string()),
526 profile: None,
527 min_size: 1,
528 max_size: 5,
529 scale_policy: None,
530 nested_virtualization: None,
531 })
532 .build();
533
534 let cluster2 = ComputeCluster::new("compute".to_string())
535 .capacity_group(CapacityGroup {
536 group_id: "general".to_string(),
537 instance_type: Some("m7g.xlarge".to_string()),
538 profile: None,
539 min_size: 2,
540 max_size: 10,
541 scale_policy: None,
542 nested_virtualization: None,
543 })
544 .build();
545
546 let result = cluster1.validate_update(&cluster2);
548 assert!(result.is_ok());
549 }
550
551 #[test]
552 fn test_compute_cluster_serialization() {
553 let cluster = ComputeCluster::new("test-cluster".to_string())
554 .capacity_group(CapacityGroup {
555 group_id: "general".to_string(),
556 instance_type: Some("m7g.xlarge".to_string()),
557 profile: None,
558 min_size: 1,
559 max_size: 5,
560 scale_policy: None,
561 nested_virtualization: None,
562 })
563 .build();
564
565 let json = serde_json::to_string(&cluster).unwrap();
566 let deserialized: ComputeCluster = serde_json::from_str(&json).unwrap();
567 assert_eq!(cluster, deserialized);
568 }
569}