1use crate::{
8 instance_catalog::{self, WorkloadRequirements},
9 CapacityGroup, CapacityGroupScalePolicy, ComputeChoiceRange, ComputePoolSelection, Container,
10 Daemon, ErrorData, GpuSpec, MachineProfile, Platform, ResourceSpec, Stack,
11};
12use alien_error::{AlienError, Result};
13use serde::{Deserialize, Serialize};
14use std::collections::HashMap;
15
16#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
18#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
19#[serde(rename_all = "camelCase")]
20pub struct ComputePlan {
21 pub pools: Vec<ComputePoolPlan>,
23}
24
25#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
28#[serde(rename_all = "camelCase")]
29pub struct ComputePoolPlan {
30 pub pool_id: String,
32 pub workloads: Vec<String>,
34 pub requirements: MachineProfile,
36 pub scale: CapacityGroupScalePolicy,
38 pub selected: ComputePoolSelection,
40 pub recommended: ComputePoolSelection,
42 pub machines: Vec<ComputeMachineOption>,
44 #[serde(default, skip_serializing_if = "Vec::is_empty")]
46 pub errors: Vec<String>,
47}
48
49#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
51#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
52#[serde(rename_all = "camelCase")]
53pub struct ComputeMachineOption {
54 pub machine: String,
56 pub profile: MachineProfile,
58 pub recommended: bool,
60}
61
62pub fn plan_compute(
64 stack: &Stack,
65 platform: Platform,
66 selected_settings: Option<&crate::ComputeSettings>,
67) -> Result<ComputePlan, ErrorData> {
68 let mut groups = collect_workload_groups(stack)?;
69 merge_explicit_compute_groups(stack, &mut groups)?;
70
71 let mut pool_ids: Vec<String> = groups.keys().cloned().collect();
72 pool_ids.sort();
73
74 let mut pools = Vec::new();
75 for pool_id in pool_ids {
76 let group = groups.remove(&pool_id).expect("pool id came from map keys");
77 let requirements = group.requirements;
78 let selected = selected_settings.and_then(|settings| settings.pools.get(&pool_id));
79 let recommended = recommended_selection(platform, &requirements, &group.scale)?;
80 let selected_choice = selected.cloned().unwrap_or_else(|| recommended.clone());
81 let errors = validate_compute_pool_selection(
82 platform,
83 &pool_id,
84 &selected_choice,
85 &requirements,
86 &group.scale,
87 );
88 let machines = machine_options(platform, &requirements, selected_choice.machine())?;
89
90 pools.push(ComputePoolPlan {
91 pool_id,
92 workloads: group.workloads,
93 requirements: requirements_to_profile(&requirements),
94 scale: group.scale,
95 selected: selected_choice,
96 recommended,
97 machines,
98 errors,
99 });
100 }
101
102 Ok(ComputePlan { pools })
103}
104
105#[derive(Debug, Clone)]
106struct PlannedGroup {
107 workloads: Vec<String>,
108 requirements: WorkloadRequirements,
109 scale: CapacityGroupScalePolicy,
110}
111
112fn collect_workload_groups(stack: &Stack) -> Result<HashMap<String, PlannedGroup>, ErrorData> {
113 let mut groups: HashMap<String, Vec<Workload>> = HashMap::new();
114
115 for entry in stack.resources.values() {
116 if let Some(container) = entry.config.downcast_ref::<Container>() {
117 groups
118 .entry(
119 container
120 .pool
121 .clone()
122 .unwrap_or_else(|| needed_container_pool(container).to_string()),
123 )
124 .or_default()
125 .push(Workload::from_container(container)?);
126 }
127 if let Some(daemon) = entry.config.downcast_ref::<Daemon>() {
128 if daemon.cluster.is_some() {
129 groups
130 .entry(daemon.pool.clone().unwrap_or_else(|| "general".to_string()))
131 .or_default()
132 .push(Workload::from_daemon(daemon)?);
133 }
134 }
135 }
136
137 let mut planned = HashMap::new();
138 for (pool_id, workloads) in groups {
139 let requirements = aggregate_workloads(&workloads);
140 let min_size = default_min_machines(&requirements);
141 let max_size = default_max_machines(&requirements);
142 planned.insert(
143 pool_id,
144 PlannedGroup {
145 workloads: workloads.into_iter().map(|w| w.id).collect(),
146 scale: CapacityGroupScalePolicy::from_selected_bounds(min_size, max_size),
147 requirements,
148 },
149 );
150 }
151 if planned.is_empty() {
152 let requirements = default_requirements();
153 planned.insert(
154 "general".to_string(),
155 PlannedGroup {
156 workloads: Vec::new(),
157 scale: CapacityGroupScalePolicy::from_selected_bounds(1, 1),
158 requirements,
159 },
160 );
161 }
162 Ok(planned)
163}
164
165fn merge_explicit_compute_groups(
166 stack: &Stack,
167 groups: &mut HashMap<String, PlannedGroup>,
168) -> Result<(), ErrorData> {
169 for entry in stack.resources.values() {
170 let Some(cluster) = entry.config.downcast_ref::<crate::ComputeCluster>() else {
171 continue;
172 };
173 for group in &cluster.capacity_groups {
174 let explicit_requirements = profile_to_requirements(
175 group.profile.as_ref(),
176 group.nested_virtualization.unwrap_or(false),
177 );
178 let scale = group.scale_policy.clone().unwrap_or_else(|| {
179 CapacityGroupScalePolicy::from_selected_bounds(group.min_size, group.max_size)
180 });
181 groups
182 .entry(group.group_id.clone())
183 .and_modify(|planned| {
184 merge_requirements(&mut planned.requirements, &explicit_requirements);
185 planned.scale = merge_scale_policy(&planned.scale, &scale);
186 })
187 .or_insert_with(|| PlannedGroup {
188 workloads: Vec::new(),
189 scale,
190 requirements: explicit_requirements,
191 });
192 }
193 }
194 Ok(())
195}
196
197fn recommended_selection(
198 platform: Platform,
199 requirements: &WorkloadRequirements,
200 scale: &CapacityGroupScalePolicy,
201) -> Result<ComputePoolSelection, ErrorData> {
202 let machine = match platform {
203 Platform::Aws | Platform::Gcp | Platform::Azure => Some(
204 instance_catalog::select_instance_type(platform, requirements)
205 .map_err(|message| {
206 AlienError::new(ErrorData::GenericError {
207 message: format!("Failed to select {platform} machine: {message}"),
208 })
209 })?
210 .instance_type
211 .to_string(),
212 ),
213 Platform::Local | Platform::Kubernetes | Platform::Test => None,
214 };
215
216 match scale {
217 CapacityGroupScalePolicy::Fixed { machines } => Ok(ComputePoolSelection::Fixed {
218 machines: machines.default.max(1),
219 machine,
220 }),
221 CapacityGroupScalePolicy::Autoscale { min, max } => Ok(ComputePoolSelection::Autoscale {
222 min: min.default,
223 max: max.default.max(min.default),
224 machine,
225 }),
226 }
227}
228
229pub fn validate_compute_pool_selection(
232 platform: Platform,
233 pool_id: &str,
234 selection: &ComputePoolSelection,
235 requirements: &WorkloadRequirements,
236 scale: &CapacityGroupScalePolicy,
237) -> Vec<String> {
238 let mut errors = Vec::new();
239 if let Err(message) = selection.validate() {
240 errors.push(message);
241 }
242 if let Err(message) = validate_selection_against_scale(selection, scale) {
243 errors.push(format!("Pool '{pool_id}' {message}"));
244 }
245 if matches!(platform, Platform::Aws | Platform::Gcp | Platform::Azure) {
246 match selection.machine() {
247 Some(machine) => match instance_catalog::find_instance_type(platform, machine) {
248 Some(spec) => {
249 if !instance_satisfies(spec, requirements) {
250 errors.push(format!(
251 "{} machine '{}' does not satisfy pool '{}' requirements",
252 platform, machine, pool_id
253 ));
254 }
255 }
256 None => errors.push(format!(
257 "Unknown {} machine '{}' for pool '{}'",
258 platform, machine, pool_id
259 )),
260 },
261 None => errors.push(format!(
262 "Pool '{}' requires a provider machine on {}",
263 pool_id, platform
264 )),
265 }
266 }
267 errors
268}
269
270pub fn capacity_group_requirements(group: &CapacityGroup) -> WorkloadRequirements {
272 profile_to_requirements(
273 group.profile.as_ref(),
274 group.nested_virtualization.unwrap_or(false),
275 )
276}
277
278fn machine_options(
279 platform: Platform,
280 requirements: &WorkloadRequirements,
281 selected_machine: Option<&str>,
282) -> Result<Vec<ComputeMachineOption>, ErrorData> {
283 if !matches!(platform, Platform::Aws | Platform::Gcp | Platform::Azure) {
284 return Ok(Vec::new());
285 }
286 let recommended = instance_catalog::select_instance_type(platform, requirements)
287 .map_err(|message| {
288 AlienError::new(ErrorData::GenericError {
289 message: format!("Failed to select {platform} machine: {message}"),
290 })
291 })?
292 .instance_type
293 .to_string();
294
295 let mut options: Vec<ComputeMachineOption> = instance_catalog::catalog_for_platform(platform)
296 .into_iter()
297 .filter(|spec| instance_satisfies(spec, requirements))
298 .map(|spec| ComputeMachineOption {
299 machine: spec.name.to_string(),
300 profile: spec.to_machine_profile(),
301 recommended: spec.name == recommended || Some(spec.name) == selected_machine,
302 })
303 .collect();
304 options.sort_by(|a, b| a.machine.cmp(&b.machine));
305 Ok(options)
306}
307
308fn instance_satisfies(
309 spec: &instance_catalog::InstanceTypeSpec,
310 requirements: &WorkloadRequirements,
311) -> bool {
312 if let Some(architecture) = requirements.architecture {
313 if spec.architecture != architecture {
314 return false;
315 }
316 }
317 if requirements.nested_virt && !spec.is_nested_virt_capable() {
318 return false;
319 }
320 if spec.vcpu < requirements.max_cpu_per_container.ceil() as u32 {
321 return false;
322 }
323 if spec.memory_bytes < requirements.max_memory_per_container {
324 return false;
325 }
326 if spec.ephemeral_storage_bytes < requirements.max_ephemeral_storage_bytes {
327 return false;
328 }
329 match (&requirements.gpu, spec.gpu) {
330 (Some(required), Some(actual)) => {
331 (required.gpu_type == "any" || required.gpu_type == actual.gpu_type)
332 && actual.count >= required.count
333 }
334 (Some(_), None) => false,
335 (None, _) => true,
336 }
337}
338
339#[derive(Debug, Clone)]
340struct Workload {
341 id: String,
342 cpu: f64,
343 memory_bytes: u64,
344 desired_replicas: f64,
345 max_replicas: f64,
346 ephemeral_storage_bytes: u64,
347 gpu: Option<GpuSpec>,
348}
349
350impl Workload {
351 fn from_container(container: &Container) -> Result<Self, ErrorData> {
352 let cpu = parse_cpu(&container.id, &container.cpu)?;
353 let memory_bytes = parse_memory(&container.id, &container.memory)?;
354 let desired_replicas = container
355 .autoscaling
356 .as_ref()
357 .map(|a| a.desired)
358 .or(container.replicas)
359 .unwrap_or(1) as f64;
360 let max_replicas = container
361 .autoscaling
362 .as_ref()
363 .map(|a| a.max)
364 .or(container.replicas)
365 .unwrap_or(1) as f64;
366 let ephemeral_storage_bytes = container
367 .ephemeral_storage
368 .as_deref()
369 .map(instance_catalog::parse_memory_bytes)
370 .transpose()
371 .map_err(|message| {
372 AlienError::new(ErrorData::GenericError {
373 message: format!(
374 "Failed to parse ephemeral storage for '{}': {message}",
375 container.id
376 ),
377 })
378 })?
379 .unwrap_or(0);
380
381 Ok(Self {
382 id: container.id.clone(),
383 cpu,
384 memory_bytes,
385 desired_replicas,
386 max_replicas,
387 ephemeral_storage_bytes,
388 gpu: container.gpu.as_ref().map(|gpu| GpuSpec {
389 gpu_type: gpu.gpu_type.clone(),
390 count: gpu.count,
391 }),
392 })
393 }
394
395 fn from_daemon(daemon: &Daemon) -> Result<Self, ErrorData> {
396 Ok(Self {
397 id: daemon.id.clone(),
398 cpu: parse_cpu(&daemon.id, &daemon.cpu)?,
399 memory_bytes: parse_memory(&daemon.id, &daemon.memory)?,
400 desired_replicas: 1.0,
401 max_replicas: 1.0,
402 ephemeral_storage_bytes: 0,
403 gpu: None,
404 })
405 }
406}
407
408fn parse_cpu(resource_id: &str, spec: &ResourceSpec) -> Result<f64, ErrorData> {
409 instance_catalog::parse_cpu(&spec.desired).map_err(|message| {
410 AlienError::new(ErrorData::GenericError {
411 message: format!(
412 "Failed to parse CPU requirement '{}' for '{}': {message}",
413 spec.desired, resource_id
414 ),
415 })
416 })
417}
418
419fn parse_memory(resource_id: &str, spec: &ResourceSpec) -> Result<u64, ErrorData> {
420 instance_catalog::parse_memory_bytes(&spec.desired).map_err(|message| {
421 AlienError::new(ErrorData::GenericError {
422 message: format!(
423 "Failed to parse memory requirement '{}' for '{}': {message}",
424 spec.desired, resource_id
425 ),
426 })
427 })
428}
429
430fn aggregate_workloads(workloads: &[Workload]) -> WorkloadRequirements {
431 let mut requirements = default_requirements();
432 requirements.total_cpu_at_desired = 0.0;
433 requirements.total_memory_bytes_at_desired = 0;
434 requirements.total_cpu_at_max = 0.0;
435 requirements.total_memory_bytes_at_max = 0;
436 requirements.max_cpu_per_container = 0.0;
437 requirements.max_memory_per_container = 0;
438 requirements.max_ephemeral_storage_bytes = 0;
439 requirements.gpu = None;
440
441 for workload in workloads {
442 requirements.total_cpu_at_desired += workload.cpu * workload.desired_replicas;
443 requirements.total_cpu_at_max += workload.cpu * workload.max_replicas;
444 requirements.total_memory_bytes_at_desired +=
445 (workload.memory_bytes as f64 * workload.desired_replicas) as u64;
446 requirements.total_memory_bytes_at_max +=
447 (workload.memory_bytes as f64 * workload.max_replicas) as u64;
448 requirements.max_cpu_per_container = requirements.max_cpu_per_container.max(workload.cpu);
449 requirements.max_memory_per_container = requirements
450 .max_memory_per_container
451 .max(workload.memory_bytes);
452 requirements.max_ephemeral_storage_bytes = requirements
453 .max_ephemeral_storage_bytes
454 .max(workload.ephemeral_storage_bytes);
455 if requirements.gpu.is_none() {
456 requirements.gpu = workload.gpu.clone();
457 }
458 }
459 requirements
460}
461
462fn default_requirements() -> WorkloadRequirements {
463 WorkloadRequirements {
464 total_cpu_at_desired: 1.0,
465 total_memory_bytes_at_desired: 2 * 1024 * 1024 * 1024,
466 total_cpu_at_max: 1.0,
467 total_memory_bytes_at_max: 2 * 1024 * 1024 * 1024,
468 max_cpu_per_container: 1.0,
469 max_memory_per_container: 2 * 1024 * 1024 * 1024,
470 max_ephemeral_storage_bytes: 0,
471 gpu: None,
472 architecture: None,
473 nested_virt: false,
474 }
475}
476
477fn profile_to_requirements(
478 profile: Option<&MachineProfile>,
479 nested_virt: bool,
480) -> WorkloadRequirements {
481 let Some(profile) = profile else {
482 return WorkloadRequirements {
483 nested_virt,
484 ..default_requirements()
485 };
486 };
487 let cpu = instance_catalog::parse_cpu(&profile.cpu).unwrap_or(1.0);
488 WorkloadRequirements {
489 total_cpu_at_desired: cpu,
490 total_memory_bytes_at_desired: profile.memory_bytes,
491 total_cpu_at_max: cpu,
492 total_memory_bytes_at_max: profile.memory_bytes,
493 max_cpu_per_container: cpu,
494 max_memory_per_container: profile.memory_bytes,
495 max_ephemeral_storage_bytes: profile.ephemeral_storage_bytes,
496 gpu: profile.gpu.clone(),
497 architecture: profile.architecture,
498 nested_virt,
499 }
500}
501
502fn merge_requirements(existing: &mut WorkloadRequirements, declared: &WorkloadRequirements) {
503 existing.total_cpu_at_desired = existing
504 .total_cpu_at_desired
505 .max(declared.total_cpu_at_desired);
506 existing.total_memory_bytes_at_desired = existing
507 .total_memory_bytes_at_desired
508 .max(declared.total_memory_bytes_at_desired);
509 existing.total_cpu_at_max = existing.total_cpu_at_max.max(declared.total_cpu_at_max);
510 existing.total_memory_bytes_at_max = existing
511 .total_memory_bytes_at_max
512 .max(declared.total_memory_bytes_at_max);
513 existing.max_cpu_per_container = existing
514 .max_cpu_per_container
515 .max(declared.max_cpu_per_container);
516 existing.max_memory_per_container = existing
517 .max_memory_per_container
518 .max(declared.max_memory_per_container);
519 existing.max_ephemeral_storage_bytes = existing
520 .max_ephemeral_storage_bytes
521 .max(declared.max_ephemeral_storage_bytes);
522 if existing.gpu.is_none() {
523 existing.gpu = declared.gpu.clone();
524 }
525 existing.nested_virt |= declared.nested_virt;
526 if existing.architecture.is_none() {
527 existing.architecture = declared.architecture;
528 }
529}
530
531fn requirements_to_profile(requirements: &WorkloadRequirements) -> MachineProfile {
532 MachineProfile {
533 cpu: requirements.max_cpu_per_container.to_string(),
534 memory_bytes: requirements.max_memory_per_container,
535 ephemeral_storage_bytes: requirements.max_ephemeral_storage_bytes,
536 architecture: requirements.architecture,
537 gpu: requirements.gpu.clone(),
538 }
539}
540
541fn merge_scale_policy(
542 existing: &CapacityGroupScalePolicy,
543 declared: &CapacityGroupScalePolicy,
544) -> CapacityGroupScalePolicy {
545 match (existing, declared) {
546 (
547 CapacityGroupScalePolicy::Fixed {
548 machines: existing_machines,
549 },
550 CapacityGroupScalePolicy::Fixed {
551 machines: declared_machines,
552 },
553 ) => CapacityGroupScalePolicy::Fixed {
554 machines: merge_choice_range(existing_machines, declared_machines),
555 },
556 (_, declared) => declared.clone(),
557 }
558}
559
560fn merge_choice_range(
561 existing: &ComputeChoiceRange,
562 declared: &ComputeChoiceRange,
563) -> ComputeChoiceRange {
564 ComputeChoiceRange {
565 min: existing.min.max(declared.min),
566 max: existing.max.max(declared.max),
567 default: declared.default,
568 }
569}
570
571fn validate_selection_against_scale(
572 selection: &ComputePoolSelection,
573 scale: &CapacityGroupScalePolicy,
574) -> std::result::Result<(), String> {
575 match (selection, scale) {
576 (
577 ComputePoolSelection::Fixed { machines, .. },
578 CapacityGroupScalePolicy::Fixed { machines: allowed },
579 ) => {
580 if allowed.contains(*machines) {
581 Ok(())
582 } else {
583 Err(format!(
584 "fixed machine count {machines} is outside the allowed range {}-{}",
585 allowed.min, allowed.max
586 ))
587 }
588 }
589 (
590 ComputePoolSelection::Autoscale { min, max, .. },
591 CapacityGroupScalePolicy::Autoscale {
592 min: allowed_min,
593 max: allowed_max,
594 },
595 ) => {
596 if !allowed_min.contains(*min) {
597 return Err(format!(
598 "autoscale minimum {min} is outside the allowed range {}-{}",
599 allowed_min.min, allowed_min.max
600 ));
601 }
602 if !allowed_max.contains(*max) {
603 return Err(format!(
604 "autoscale maximum {max} is outside the allowed range {}-{}",
605 allowed_max.min, allowed_max.max
606 ));
607 }
608 Ok(())
609 }
610 (ComputePoolSelection::Fixed { .. }, CapacityGroupScalePolicy::Autoscale { .. }) => {
611 Err("must use autoscale mode".to_string())
612 }
613 (ComputePoolSelection::Autoscale { .. }, CapacityGroupScalePolicy::Fixed { .. }) => {
614 Err("must use fixed mode".to_string())
615 }
616 }
617}
618
619fn needed_container_pool(container: &Container) -> &'static str {
620 if container.gpu.is_some() {
621 return "gpu";
622 }
623 if let Some(storage) = &container.ephemeral_storage {
624 if instance_catalog::parse_memory_bytes(storage).unwrap_or(0) > 200 * 1024 * 1024 * 1024 {
625 return "storage";
626 }
627 }
628 "general"
629}
630
631fn default_min_machines(requirements: &WorkloadRequirements) -> u32 {
632 if requirements.total_cpu_at_desired > 0.0 || requirements.total_memory_bytes_at_desired > 0 {
633 1
634 } else {
635 0
636 }
637}
638
639fn default_max_machines(requirements: &WorkloadRequirements) -> u32 {
640 let min = default_min_machines(requirements);
641 let by_cpu =
642 (requirements.total_cpu_at_max / requirements.max_cpu_per_container.max(1.0)).ceil() as u32;
643 let by_mem = requirements
644 .total_memory_bytes_at_max
645 .div_ceil(requirements.max_memory_per_container.max(1)) as u32;
646 min.max(by_cpu).max(by_mem).max(1)
647}
648
649#[cfg(test)]
650mod tests {
651 use super::*;
652 use crate::{
653 instance_catalog::Architecture, CapacityGroup, CapacityGroupScalePolicy,
654 ComputeChoiceRange, ComputeCluster, ComputeSettings, ContainerCode, DaemonCode, Resource,
655 ResourceEntry, ResourceLifecycle, Stack,
656 };
657
658 fn stack_with_container() -> Stack {
659 let container = Container::new("api".to_string())
660 .code(ContainerCode::Image {
661 image: "api:latest".to_string(),
662 })
663 .cpu(ResourceSpec {
664 min: "1".to_string(),
665 desired: "2".to_string(),
666 })
667 .memory(ResourceSpec {
668 min: "2Gi".to_string(),
669 desired: "4Gi".to_string(),
670 })
671 .permissions("api".to_string())
672 .build();
673 Stack {
674 id: "test".to_string(),
675 resources: [(
676 "api".to_string(),
677 ResourceEntry {
678 config: Resource::new(container),
679 lifecycle: ResourceLifecycle::Live,
680 dependencies: Vec::new(),
681 remote_access: false,
682 },
683 )]
684 .into_iter()
685 .collect(),
686 permissions: crate::permissions::PermissionsConfig::default(),
687 supported_platforms: None,
688 inputs: vec![],
689 }
690 }
691
692 #[test]
693 fn cloud_plan_recommends_provider_machine_without_mutating_selection() {
694 let stack = stack_with_container();
695
696 let plan = plan_compute(&stack, Platform::Aws, None).expect("plan should build");
697
698 let pool = plan.pools.first().expect("general pool should exist");
699 assert_eq!(pool.pool_id, "general");
700 assert_eq!(pool.workloads, vec!["api"]);
701 assert!(pool.selected.machine().is_some());
702 assert!(pool.machines.iter().any(|machine| machine.recommended));
703 }
704
705 #[test]
706 fn selected_machine_is_preserved_as_static_deployment_choice() {
707 let stack = stack_with_container();
708 let settings = ComputeSettings {
709 pools: [(
710 "general".to_string(),
711 ComputePoolSelection::Fixed {
712 machines: 1,
713 machine: Some("m7g.xlarge".to_string()),
714 },
715 )]
716 .into_iter()
717 .collect(),
718 };
719
720 let plan = plan_compute(&stack, Platform::Aws, Some(&settings)).expect("plan should build");
721
722 let pool = plan.pools.first().expect("general pool should exist");
723 assert_eq!(pool.selected.machine(), Some("m7g.xlarge"));
724 assert!(pool.errors.is_empty());
725 }
726
727 #[test]
728 fn explicit_capacity_group_requirements_are_merged_with_workloads() {
729 let mut stack = stack_with_container();
730 let cluster = ComputeCluster::new("compute".to_string())
731 .capacity_group(CapacityGroup {
732 group_id: "general".to_string(),
733 instance_type: None,
734 profile: Some(MachineProfile {
735 cpu: "4".to_string(),
736 memory_bytes: 16 * 1024 * 1024 * 1024,
737 ephemeral_storage_bytes: 20 * 1024 * 1024 * 1024,
738 architecture: None,
739 gpu: None,
740 }),
741 min_size: 2,
742 max_size: 5,
743 scale_policy: None,
744 nested_virtualization: Some(true),
745 })
746 .build();
747 stack.resources.insert(
748 "compute".to_string(),
749 ResourceEntry {
750 config: Resource::new(cluster),
751 lifecycle: ResourceLifecycle::Frozen,
752 dependencies: Vec::new(),
753 remote_access: false,
754 },
755 );
756
757 let plan = plan_compute(&stack, Platform::Aws, None).expect("plan should build");
758
759 let pool = plan.pools.first().expect("general pool should exist");
760 let machine = pool
761 .selected
762 .machine()
763 .expect("AWS selection should include a machine");
764 let spec = instance_catalog::find_instance_type(Platform::Aws, machine)
765 .expect("selected machine should exist in the catalog");
766 assert!(spec.is_nested_virt_capable());
767 assert_eq!(pool.selected.min_size(), 2);
768 assert_eq!(pool.selected.max_size(), 5);
769 assert!(pool.errors.is_empty());
770 }
771
772 #[test]
773 fn nested_x86_fixed_range_pool_preserves_bounds_and_rejects_graviton() {
774 let daemon = Daemon::new("bear-agent-loader".to_string())
775 .code(DaemonCode::Image {
776 image: "example.com/bear:latest".to_string(),
777 })
778 .cluster("bear-runtime".to_string())
779 .cpu(ResourceSpec {
780 min: "2".to_string(),
781 desired: "2".to_string(),
782 })
783 .memory(ResourceSpec {
784 min: "4Gi".to_string(),
785 desired: "4Gi".to_string(),
786 })
787 .permissions("loader".to_string())
788 .build();
789 let cluster = ComputeCluster::new("bear-runtime".to_string())
790 .capacity_group(CapacityGroup {
791 group_id: "general".to_string(),
792 instance_type: None,
793 profile: Some(MachineProfile {
794 cpu: "4".to_string(),
795 memory_bytes: 16 * 1024 * 1024 * 1024,
796 ephemeral_storage_bytes: 20 * 1024 * 1024 * 1024,
797 architecture: Some(Architecture::X86_64),
798 gpu: None,
799 }),
800 min_size: 2,
801 max_size: 2,
802 scale_policy: Some(CapacityGroupScalePolicy::Fixed {
803 machines: ComputeChoiceRange {
804 min: 1,
805 max: 5,
806 default: 2,
807 },
808 }),
809 nested_virtualization: Some(true),
810 })
811 .build();
812 let stack = Stack {
813 id: "bear".to_string(),
814 resources: [
815 (
816 "bear-agent-loader".to_string(),
817 ResourceEntry {
818 config: Resource::new(daemon),
819 lifecycle: ResourceLifecycle::Live,
820 dependencies: Vec::new(),
821 remote_access: false,
822 },
823 ),
824 (
825 "bear-runtime".to_string(),
826 ResourceEntry {
827 config: Resource::new(cluster),
828 lifecycle: ResourceLifecycle::Frozen,
829 dependencies: Vec::new(),
830 remote_access: false,
831 },
832 ),
833 ]
834 .into_iter()
835 .collect(),
836 permissions: crate::permissions::PermissionsConfig::default(),
837 supported_platforms: None,
838 inputs: vec![],
839 };
840
841 let plan = plan_compute(&stack, Platform::Aws, None).expect("plan should build");
842 let pool = plan.pools.first().expect("general pool should exist");
843 assert_eq!(pool.recommended.machine(), Some("m8i.2xlarge"));
844 assert_eq!(pool.recommended.min_size(), 2);
845 assert_eq!(pool.recommended.max_size(), 2);
846 assert_eq!(
847 pool.scale,
848 CapacityGroupScalePolicy::Fixed {
849 machines: ComputeChoiceRange {
850 min: 1,
851 max: 5,
852 default: 2,
853 },
854 }
855 );
856 assert!(!pool
857 .machines
858 .iter()
859 .any(|option| option.machine == "m7g.2xlarge"));
860
861 let invalid_settings = ComputeSettings {
862 pools: [(
863 "general".to_string(),
864 ComputePoolSelection::Fixed {
865 machines: 2,
866 machine: Some("m7g.2xlarge".to_string()),
867 },
868 )]
869 .into_iter()
870 .collect(),
871 };
872 let invalid_plan = plan_compute(&stack, Platform::Aws, Some(&invalid_settings))
873 .expect("plan should build");
874 assert!(!invalid_plan.pools[0].errors.is_empty());
875 }
876
877 #[test]
878 fn local_plan_has_no_provider_machine_choices() {
879 let stack = stack_with_container();
880
881 let plan = plan_compute(&stack, Platform::Local, None).expect("plan should build");
882
883 let pool = plan.pools.first().expect("general pool should exist");
884 assert_eq!(pool.selected.machine(), None);
885 assert!(pool.machines.is_empty());
886 assert!(pool.errors.is_empty());
887 }
888}