alien_core/
compute_planner.rs

1//! Deployment-time compute planner.
2//!
3//! The planner turns portable stack requirements plus a target platform into a
4//! renderable set of recommended deployment choices. It does not mutate the
5//! stack and does not require database access.
6
7use crate::{
8    instance_catalog::{self, WorkloadRequirements},
9    CapacityGroup, CapacityGroupScalePolicy, ComputeChoiceRange, ComputePoolSelection, Container,
10    Daemon, ErrorData, GpuSpec, MachineProfile, Platform, ResourceSpec, Stack,
11};
12use alien_error::{AlienError, Result};
13use serde::{Deserialize, Serialize};
14use std::collections::HashMap;
15
16/// Full compute plan for one stack/platform pair.
17#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
18#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
19#[serde(rename_all = "camelCase")]
20pub struct ComputePlan {
21    /// Planned pools in stable pool-id order.
22    pub pools: Vec<ComputePoolPlan>,
23}
24
25/// Planner output for one compute pool.
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
28#[serde(rename_all = "camelCase")]
29pub struct ComputePoolPlan {
30    /// Pool ID from the stack or derived default.
31    pub pool_id: String,
32    /// Workloads assigned to this pool.
33    pub workloads: Vec<String>,
34    /// Aggregated requirements used for machine selection.
35    pub requirements: MachineProfile,
36    /// Allowed scale policy declared by source or derived for generated pools.
37    pub scale: CapacityGroupScalePolicy,
38    /// Recommended or user-selected deployment choice.
39    pub selected: ComputePoolSelection,
40    /// Planner-recommended default.
41    pub recommended: ComputePoolSelection,
42    /// Valid cloud machine choices. Empty for local and Kubernetes.
43    pub machines: Vec<ComputeMachineOption>,
44    /// Validation errors for supplied deployment settings.
45    #[serde(default, skip_serializing_if = "Vec::is_empty")]
46    pub errors: Vec<String>,
47}
48
49/// One concrete provider machine option.
50#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
51#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
52#[serde(rename_all = "camelCase")]
53pub struct ComputeMachineOption {
54    /// Provider machine name.
55    pub machine: String,
56    /// Machine hardware profile.
57    pub profile: MachineProfile,
58    /// Whether this machine is the planner's default recommendation.
59    pub recommended: bool,
60}
61
62/// Compute a deterministic deployment-time plan.
63pub fn plan_compute(
64    stack: &Stack,
65    platform: Platform,
66    selected_settings: Option<&crate::ComputeSettings>,
67) -> Result<ComputePlan, ErrorData> {
68    let mut groups = collect_workload_groups(stack)?;
69    merge_explicit_compute_groups(stack, &mut groups)?;
70
71    let mut pool_ids: Vec<String> = groups.keys().cloned().collect();
72    pool_ids.sort();
73
74    let mut pools = Vec::new();
75    for pool_id in pool_ids {
76        let group = groups.remove(&pool_id).expect("pool id came from map keys");
77        let requirements = group.requirements;
78        let selected = selected_settings.and_then(|settings| settings.pools.get(&pool_id));
79        let recommended = recommended_selection(platform, &requirements, &group.scale)?;
80        let selected_choice = selected.cloned().unwrap_or_else(|| recommended.clone());
81        let errors = validate_compute_pool_selection(
82            platform,
83            &pool_id,
84            &selected_choice,
85            &requirements,
86            &group.scale,
87        );
88        let machines = machine_options(platform, &requirements, selected_choice.machine())?;
89
90        pools.push(ComputePoolPlan {
91            pool_id,
92            workloads: group.workloads,
93            requirements: requirements_to_profile(&requirements),
94            scale: group.scale,
95            selected: selected_choice,
96            recommended,
97            machines,
98            errors,
99        });
100    }
101
102    Ok(ComputePlan { pools })
103}
104
105#[derive(Debug, Clone)]
106struct PlannedGroup {
107    workloads: Vec<String>,
108    requirements: WorkloadRequirements,
109    scale: CapacityGroupScalePolicy,
110}
111
112fn collect_workload_groups(stack: &Stack) -> Result<HashMap<String, PlannedGroup>, ErrorData> {
113    let mut groups: HashMap<String, Vec<Workload>> = HashMap::new();
114
115    for entry in stack.resources.values() {
116        if let Some(container) = entry.config.downcast_ref::<Container>() {
117            groups
118                .entry(
119                    container
120                        .pool
121                        .clone()
122                        .unwrap_or_else(|| needed_container_pool(container).to_string()),
123                )
124                .or_default()
125                .push(Workload::from_container(container)?);
126        }
127        if let Some(daemon) = entry.config.downcast_ref::<Daemon>() {
128            if daemon.cluster.is_some() {
129                groups
130                    .entry(daemon.pool.clone().unwrap_or_else(|| "general".to_string()))
131                    .or_default()
132                    .push(Workload::from_daemon(daemon)?);
133            }
134        }
135    }
136
137    let mut planned = HashMap::new();
138    for (pool_id, workloads) in groups {
139        let requirements = aggregate_workloads(&workloads);
140        let min_size = default_min_machines(&requirements);
141        let max_size = default_max_machines(&requirements);
142        planned.insert(
143            pool_id,
144            PlannedGroup {
145                workloads: workloads.into_iter().map(|w| w.id).collect(),
146                scale: CapacityGroupScalePolicy::from_selected_bounds(min_size, max_size),
147                requirements,
148            },
149        );
150    }
151    if planned.is_empty() {
152        let requirements = default_requirements();
153        planned.insert(
154            "general".to_string(),
155            PlannedGroup {
156                workloads: Vec::new(),
157                scale: CapacityGroupScalePolicy::from_selected_bounds(1, 1),
158                requirements,
159            },
160        );
161    }
162    Ok(planned)
163}
164
165fn merge_explicit_compute_groups(
166    stack: &Stack,
167    groups: &mut HashMap<String, PlannedGroup>,
168) -> Result<(), ErrorData> {
169    for entry in stack.resources.values() {
170        let Some(cluster) = entry.config.downcast_ref::<crate::ComputeCluster>() else {
171            continue;
172        };
173        for group in &cluster.capacity_groups {
174            let explicit_requirements = profile_to_requirements(
175                group.profile.as_ref(),
176                group.nested_virtualization.unwrap_or(false),
177            );
178            let scale = group.scale_policy.clone().unwrap_or_else(|| {
179                CapacityGroupScalePolicy::from_selected_bounds(group.min_size, group.max_size)
180            });
181            groups
182                .entry(group.group_id.clone())
183                .and_modify(|planned| {
184                    merge_requirements(&mut planned.requirements, &explicit_requirements);
185                    planned.scale = merge_scale_policy(&planned.scale, &scale);
186                })
187                .or_insert_with(|| PlannedGroup {
188                    workloads: Vec::new(),
189                    scale,
190                    requirements: explicit_requirements,
191                });
192        }
193    }
194    Ok(())
195}
196
197fn recommended_selection(
198    platform: Platform,
199    requirements: &WorkloadRequirements,
200    scale: &CapacityGroupScalePolicy,
201) -> Result<ComputePoolSelection, ErrorData> {
202    let machine = match platform {
203        Platform::Aws | Platform::Gcp | Platform::Azure => Some(
204            instance_catalog::select_instance_type(platform, requirements)
205                .map_err(|message| {
206                    AlienError::new(ErrorData::GenericError {
207                        message: format!("Failed to select {platform} machine: {message}"),
208                    })
209                })?
210                .instance_type
211                .to_string(),
212        ),
213        Platform::Local | Platform::Kubernetes | Platform::Test => None,
214    };
215
216    match scale {
217        CapacityGroupScalePolicy::Fixed { machines } => Ok(ComputePoolSelection::Fixed {
218            machines: machines.default.max(1),
219            machine,
220        }),
221        CapacityGroupScalePolicy::Autoscale { min, max } => Ok(ComputePoolSelection::Autoscale {
222            min: min.default,
223            max: max.default.max(min.default),
224            machine,
225        }),
226    }
227}
228
229/// Validate one selected compute pool against platform machine requirements and
230/// source-declared scale bounds.
231pub fn validate_compute_pool_selection(
232    platform: Platform,
233    pool_id: &str,
234    selection: &ComputePoolSelection,
235    requirements: &WorkloadRequirements,
236    scale: &CapacityGroupScalePolicy,
237) -> Vec<String> {
238    let mut errors = Vec::new();
239    if let Err(message) = selection.validate() {
240        errors.push(message);
241    }
242    if let Err(message) = validate_selection_against_scale(selection, scale) {
243        errors.push(format!("Pool '{pool_id}' {message}"));
244    }
245    if matches!(platform, Platform::Aws | Platform::Gcp | Platform::Azure) {
246        match selection.machine() {
247            Some(machine) => match instance_catalog::find_instance_type(platform, machine) {
248                Some(spec) => {
249                    if !instance_satisfies(spec, requirements) {
250                        errors.push(format!(
251                            "{} machine '{}' does not satisfy pool '{}' requirements",
252                            platform, machine, pool_id
253                        ));
254                    }
255                }
256                None => errors.push(format!(
257                    "Unknown {} machine '{}' for pool '{}'",
258                    platform, machine, pool_id
259                )),
260            },
261            None => errors.push(format!(
262                "Pool '{}' requires a provider machine on {}",
263                pool_id, platform
264            )),
265        }
266    }
267    errors
268}
269
270/// Convert a capacity group declaration into planner requirements.
271pub fn capacity_group_requirements(group: &CapacityGroup) -> WorkloadRequirements {
272    profile_to_requirements(
273        group.profile.as_ref(),
274        group.nested_virtualization.unwrap_or(false),
275    )
276}
277
278fn machine_options(
279    platform: Platform,
280    requirements: &WorkloadRequirements,
281    selected_machine: Option<&str>,
282) -> Result<Vec<ComputeMachineOption>, ErrorData> {
283    if !matches!(platform, Platform::Aws | Platform::Gcp | Platform::Azure) {
284        return Ok(Vec::new());
285    }
286    let recommended = instance_catalog::select_instance_type(platform, requirements)
287        .map_err(|message| {
288            AlienError::new(ErrorData::GenericError {
289                message: format!("Failed to select {platform} machine: {message}"),
290            })
291        })?
292        .instance_type
293        .to_string();
294
295    let mut options: Vec<ComputeMachineOption> = instance_catalog::catalog_for_platform(platform)
296        .into_iter()
297        .filter(|spec| instance_satisfies(spec, requirements))
298        .map(|spec| ComputeMachineOption {
299            machine: spec.name.to_string(),
300            profile: spec.to_machine_profile(),
301            recommended: spec.name == recommended || Some(spec.name) == selected_machine,
302        })
303        .collect();
304    options.sort_by(|a, b| a.machine.cmp(&b.machine));
305    Ok(options)
306}
307
308fn instance_satisfies(
309    spec: &instance_catalog::InstanceTypeSpec,
310    requirements: &WorkloadRequirements,
311) -> bool {
312    if let Some(architecture) = requirements.architecture {
313        if spec.architecture != architecture {
314            return false;
315        }
316    }
317    if requirements.nested_virt && !spec.is_nested_virt_capable() {
318        return false;
319    }
320    if spec.vcpu < requirements.max_cpu_per_container.ceil() as u32 {
321        return false;
322    }
323    if spec.memory_bytes < requirements.max_memory_per_container {
324        return false;
325    }
326    if spec.ephemeral_storage_bytes < requirements.max_ephemeral_storage_bytes {
327        return false;
328    }
329    match (&requirements.gpu, spec.gpu) {
330        (Some(required), Some(actual)) => {
331            (required.gpu_type == "any" || required.gpu_type == actual.gpu_type)
332                && actual.count >= required.count
333        }
334        (Some(_), None) => false,
335        (None, _) => true,
336    }
337}
338
339#[derive(Debug, Clone)]
340struct Workload {
341    id: String,
342    cpu: f64,
343    memory_bytes: u64,
344    desired_replicas: f64,
345    max_replicas: f64,
346    ephemeral_storage_bytes: u64,
347    gpu: Option<GpuSpec>,
348}
349
350impl Workload {
351    fn from_container(container: &Container) -> Result<Self, ErrorData> {
352        let cpu = parse_cpu(&container.id, &container.cpu)?;
353        let memory_bytes = parse_memory(&container.id, &container.memory)?;
354        let desired_replicas = container
355            .autoscaling
356            .as_ref()
357            .map(|a| a.desired)
358            .or(container.replicas)
359            .unwrap_or(1) as f64;
360        let max_replicas = container
361            .autoscaling
362            .as_ref()
363            .map(|a| a.max)
364            .or(container.replicas)
365            .unwrap_or(1) as f64;
366        let ephemeral_storage_bytes = container
367            .ephemeral_storage
368            .as_deref()
369            .map(instance_catalog::parse_memory_bytes)
370            .transpose()
371            .map_err(|message| {
372                AlienError::new(ErrorData::GenericError {
373                    message: format!(
374                        "Failed to parse ephemeral storage for '{}': {message}",
375                        container.id
376                    ),
377                })
378            })?
379            .unwrap_or(0);
380
381        Ok(Self {
382            id: container.id.clone(),
383            cpu,
384            memory_bytes,
385            desired_replicas,
386            max_replicas,
387            ephemeral_storage_bytes,
388            gpu: container.gpu.as_ref().map(|gpu| GpuSpec {
389                gpu_type: gpu.gpu_type.clone(),
390                count: gpu.count,
391            }),
392        })
393    }
394
395    fn from_daemon(daemon: &Daemon) -> Result<Self, ErrorData> {
396        Ok(Self {
397            id: daemon.id.clone(),
398            cpu: parse_cpu(&daemon.id, &daemon.cpu)?,
399            memory_bytes: parse_memory(&daemon.id, &daemon.memory)?,
400            desired_replicas: 1.0,
401            max_replicas: 1.0,
402            ephemeral_storage_bytes: 0,
403            gpu: None,
404        })
405    }
406}
407
408fn parse_cpu(resource_id: &str, spec: &ResourceSpec) -> Result<f64, ErrorData> {
409    instance_catalog::parse_cpu(&spec.desired).map_err(|message| {
410        AlienError::new(ErrorData::GenericError {
411            message: format!(
412                "Failed to parse CPU requirement '{}' for '{}': {message}",
413                spec.desired, resource_id
414            ),
415        })
416    })
417}
418
419fn parse_memory(resource_id: &str, spec: &ResourceSpec) -> Result<u64, ErrorData> {
420    instance_catalog::parse_memory_bytes(&spec.desired).map_err(|message| {
421        AlienError::new(ErrorData::GenericError {
422            message: format!(
423                "Failed to parse memory requirement '{}' for '{}': {message}",
424                spec.desired, resource_id
425            ),
426        })
427    })
428}
429
430fn aggregate_workloads(workloads: &[Workload]) -> WorkloadRequirements {
431    let mut requirements = default_requirements();
432    requirements.total_cpu_at_desired = 0.0;
433    requirements.total_memory_bytes_at_desired = 0;
434    requirements.total_cpu_at_max = 0.0;
435    requirements.total_memory_bytes_at_max = 0;
436    requirements.max_cpu_per_container = 0.0;
437    requirements.max_memory_per_container = 0;
438    requirements.max_ephemeral_storage_bytes = 0;
439    requirements.gpu = None;
440
441    for workload in workloads {
442        requirements.total_cpu_at_desired += workload.cpu * workload.desired_replicas;
443        requirements.total_cpu_at_max += workload.cpu * workload.max_replicas;
444        requirements.total_memory_bytes_at_desired +=
445            (workload.memory_bytes as f64 * workload.desired_replicas) as u64;
446        requirements.total_memory_bytes_at_max +=
447            (workload.memory_bytes as f64 * workload.max_replicas) as u64;
448        requirements.max_cpu_per_container = requirements.max_cpu_per_container.max(workload.cpu);
449        requirements.max_memory_per_container = requirements
450            .max_memory_per_container
451            .max(workload.memory_bytes);
452        requirements.max_ephemeral_storage_bytes = requirements
453            .max_ephemeral_storage_bytes
454            .max(workload.ephemeral_storage_bytes);
455        if requirements.gpu.is_none() {
456            requirements.gpu = workload.gpu.clone();
457        }
458    }
459    requirements
460}
461
462fn default_requirements() -> WorkloadRequirements {
463    WorkloadRequirements {
464        total_cpu_at_desired: 1.0,
465        total_memory_bytes_at_desired: 2 * 1024 * 1024 * 1024,
466        total_cpu_at_max: 1.0,
467        total_memory_bytes_at_max: 2 * 1024 * 1024 * 1024,
468        max_cpu_per_container: 1.0,
469        max_memory_per_container: 2 * 1024 * 1024 * 1024,
470        max_ephemeral_storage_bytes: 0,
471        gpu: None,
472        architecture: None,
473        nested_virt: false,
474    }
475}
476
477fn profile_to_requirements(
478    profile: Option<&MachineProfile>,
479    nested_virt: bool,
480) -> WorkloadRequirements {
481    let Some(profile) = profile else {
482        return WorkloadRequirements {
483            nested_virt,
484            ..default_requirements()
485        };
486    };
487    let cpu = instance_catalog::parse_cpu(&profile.cpu).unwrap_or(1.0);
488    WorkloadRequirements {
489        total_cpu_at_desired: cpu,
490        total_memory_bytes_at_desired: profile.memory_bytes,
491        total_cpu_at_max: cpu,
492        total_memory_bytes_at_max: profile.memory_bytes,
493        max_cpu_per_container: cpu,
494        max_memory_per_container: profile.memory_bytes,
495        max_ephemeral_storage_bytes: profile.ephemeral_storage_bytes,
496        gpu: profile.gpu.clone(),
497        architecture: profile.architecture,
498        nested_virt,
499    }
500}
501
502fn merge_requirements(existing: &mut WorkloadRequirements, declared: &WorkloadRequirements) {
503    existing.total_cpu_at_desired = existing
504        .total_cpu_at_desired
505        .max(declared.total_cpu_at_desired);
506    existing.total_memory_bytes_at_desired = existing
507        .total_memory_bytes_at_desired
508        .max(declared.total_memory_bytes_at_desired);
509    existing.total_cpu_at_max = existing.total_cpu_at_max.max(declared.total_cpu_at_max);
510    existing.total_memory_bytes_at_max = existing
511        .total_memory_bytes_at_max
512        .max(declared.total_memory_bytes_at_max);
513    existing.max_cpu_per_container = existing
514        .max_cpu_per_container
515        .max(declared.max_cpu_per_container);
516    existing.max_memory_per_container = existing
517        .max_memory_per_container
518        .max(declared.max_memory_per_container);
519    existing.max_ephemeral_storage_bytes = existing
520        .max_ephemeral_storage_bytes
521        .max(declared.max_ephemeral_storage_bytes);
522    if existing.gpu.is_none() {
523        existing.gpu = declared.gpu.clone();
524    }
525    existing.nested_virt |= declared.nested_virt;
526    if existing.architecture.is_none() {
527        existing.architecture = declared.architecture;
528    }
529}
530
531fn requirements_to_profile(requirements: &WorkloadRequirements) -> MachineProfile {
532    MachineProfile {
533        cpu: requirements.max_cpu_per_container.to_string(),
534        memory_bytes: requirements.max_memory_per_container,
535        ephemeral_storage_bytes: requirements.max_ephemeral_storage_bytes,
536        architecture: requirements.architecture,
537        gpu: requirements.gpu.clone(),
538    }
539}
540
541fn merge_scale_policy(
542    existing: &CapacityGroupScalePolicy,
543    declared: &CapacityGroupScalePolicy,
544) -> CapacityGroupScalePolicy {
545    match (existing, declared) {
546        (
547            CapacityGroupScalePolicy::Fixed {
548                machines: existing_machines,
549            },
550            CapacityGroupScalePolicy::Fixed {
551                machines: declared_machines,
552            },
553        ) => CapacityGroupScalePolicy::Fixed {
554            machines: merge_choice_range(existing_machines, declared_machines),
555        },
556        (_, declared) => declared.clone(),
557    }
558}
559
560fn merge_choice_range(
561    existing: &ComputeChoiceRange,
562    declared: &ComputeChoiceRange,
563) -> ComputeChoiceRange {
564    ComputeChoiceRange {
565        min: existing.min.max(declared.min),
566        max: existing.max.max(declared.max),
567        default: declared.default,
568    }
569}
570
571fn validate_selection_against_scale(
572    selection: &ComputePoolSelection,
573    scale: &CapacityGroupScalePolicy,
574) -> std::result::Result<(), String> {
575    match (selection, scale) {
576        (
577            ComputePoolSelection::Fixed { machines, .. },
578            CapacityGroupScalePolicy::Fixed { machines: allowed },
579        ) => {
580            if allowed.contains(*machines) {
581                Ok(())
582            } else {
583                Err(format!(
584                    "fixed machine count {machines} is outside the allowed range {}-{}",
585                    allowed.min, allowed.max
586                ))
587            }
588        }
589        (
590            ComputePoolSelection::Autoscale { min, max, .. },
591            CapacityGroupScalePolicy::Autoscale {
592                min: allowed_min,
593                max: allowed_max,
594            },
595        ) => {
596            if !allowed_min.contains(*min) {
597                return Err(format!(
598                    "autoscale minimum {min} is outside the allowed range {}-{}",
599                    allowed_min.min, allowed_min.max
600                ));
601            }
602            if !allowed_max.contains(*max) {
603                return Err(format!(
604                    "autoscale maximum {max} is outside the allowed range {}-{}",
605                    allowed_max.min, allowed_max.max
606                ));
607            }
608            Ok(())
609        }
610        (ComputePoolSelection::Fixed { .. }, CapacityGroupScalePolicy::Autoscale { .. }) => {
611            Err("must use autoscale mode".to_string())
612        }
613        (ComputePoolSelection::Autoscale { .. }, CapacityGroupScalePolicy::Fixed { .. }) => {
614            Err("must use fixed mode".to_string())
615        }
616    }
617}
618
619fn needed_container_pool(container: &Container) -> &'static str {
620    if container.gpu.is_some() {
621        return "gpu";
622    }
623    if let Some(storage) = &container.ephemeral_storage {
624        if instance_catalog::parse_memory_bytes(storage).unwrap_or(0) > 200 * 1024 * 1024 * 1024 {
625            return "storage";
626        }
627    }
628    "general"
629}
630
631fn default_min_machines(requirements: &WorkloadRequirements) -> u32 {
632    if requirements.total_cpu_at_desired > 0.0 || requirements.total_memory_bytes_at_desired > 0 {
633        1
634    } else {
635        0
636    }
637}
638
639fn default_max_machines(requirements: &WorkloadRequirements) -> u32 {
640    let min = default_min_machines(requirements);
641    let by_cpu =
642        (requirements.total_cpu_at_max / requirements.max_cpu_per_container.max(1.0)).ceil() as u32;
643    let by_mem = requirements
644        .total_memory_bytes_at_max
645        .div_ceil(requirements.max_memory_per_container.max(1)) as u32;
646    min.max(by_cpu).max(by_mem).max(1)
647}
648
649#[cfg(test)]
650mod tests {
651    use super::*;
652    use crate::{
653        instance_catalog::Architecture, CapacityGroup, CapacityGroupScalePolicy,
654        ComputeChoiceRange, ComputeCluster, ComputeSettings, ContainerCode, DaemonCode, Resource,
655        ResourceEntry, ResourceLifecycle, Stack,
656    };
657
658    fn stack_with_container() -> Stack {
659        let container = Container::new("api".to_string())
660            .code(ContainerCode::Image {
661                image: "api:latest".to_string(),
662            })
663            .cpu(ResourceSpec {
664                min: "1".to_string(),
665                desired: "2".to_string(),
666            })
667            .memory(ResourceSpec {
668                min: "2Gi".to_string(),
669                desired: "4Gi".to_string(),
670            })
671            .permissions("api".to_string())
672            .build();
673        Stack {
674            id: "test".to_string(),
675            resources: [(
676                "api".to_string(),
677                ResourceEntry {
678                    config: Resource::new(container),
679                    lifecycle: ResourceLifecycle::Live,
680                    dependencies: Vec::new(),
681                    remote_access: false,
682                },
683            )]
684            .into_iter()
685            .collect(),
686            permissions: crate::permissions::PermissionsConfig::default(),
687            supported_platforms: None,
688            inputs: vec![],
689        }
690    }
691
692    #[test]
693    fn cloud_plan_recommends_provider_machine_without_mutating_selection() {
694        let stack = stack_with_container();
695
696        let plan = plan_compute(&stack, Platform::Aws, None).expect("plan should build");
697
698        let pool = plan.pools.first().expect("general pool should exist");
699        assert_eq!(pool.pool_id, "general");
700        assert_eq!(pool.workloads, vec!["api"]);
701        assert!(pool.selected.machine().is_some());
702        assert!(pool.machines.iter().any(|machine| machine.recommended));
703    }
704
705    #[test]
706    fn selected_machine_is_preserved_as_static_deployment_choice() {
707        let stack = stack_with_container();
708        let settings = ComputeSettings {
709            pools: [(
710                "general".to_string(),
711                ComputePoolSelection::Fixed {
712                    machines: 1,
713                    machine: Some("m7g.xlarge".to_string()),
714                },
715            )]
716            .into_iter()
717            .collect(),
718        };
719
720        let plan = plan_compute(&stack, Platform::Aws, Some(&settings)).expect("plan should build");
721
722        let pool = plan.pools.first().expect("general pool should exist");
723        assert_eq!(pool.selected.machine(), Some("m7g.xlarge"));
724        assert!(pool.errors.is_empty());
725    }
726
727    #[test]
728    fn explicit_capacity_group_requirements_are_merged_with_workloads() {
729        let mut stack = stack_with_container();
730        let cluster = ComputeCluster::new("compute".to_string())
731            .capacity_group(CapacityGroup {
732                group_id: "general".to_string(),
733                instance_type: None,
734                profile: Some(MachineProfile {
735                    cpu: "4".to_string(),
736                    memory_bytes: 16 * 1024 * 1024 * 1024,
737                    ephemeral_storage_bytes: 20 * 1024 * 1024 * 1024,
738                    architecture: None,
739                    gpu: None,
740                }),
741                min_size: 2,
742                max_size: 5,
743                scale_policy: None,
744                nested_virtualization: Some(true),
745            })
746            .build();
747        stack.resources.insert(
748            "compute".to_string(),
749            ResourceEntry {
750                config: Resource::new(cluster),
751                lifecycle: ResourceLifecycle::Frozen,
752                dependencies: Vec::new(),
753                remote_access: false,
754            },
755        );
756
757        let plan = plan_compute(&stack, Platform::Aws, None).expect("plan should build");
758
759        let pool = plan.pools.first().expect("general pool should exist");
760        let machine = pool
761            .selected
762            .machine()
763            .expect("AWS selection should include a machine");
764        let spec = instance_catalog::find_instance_type(Platform::Aws, machine)
765            .expect("selected machine should exist in the catalog");
766        assert!(spec.is_nested_virt_capable());
767        assert_eq!(pool.selected.min_size(), 2);
768        assert_eq!(pool.selected.max_size(), 5);
769        assert!(pool.errors.is_empty());
770    }
771
772    #[test]
773    fn nested_x86_fixed_range_pool_preserves_bounds_and_rejects_graviton() {
774        let daemon = Daemon::new("bear-agent-loader".to_string())
775            .code(DaemonCode::Image {
776                image: "example.com/bear:latest".to_string(),
777            })
778            .cluster("bear-runtime".to_string())
779            .cpu(ResourceSpec {
780                min: "2".to_string(),
781                desired: "2".to_string(),
782            })
783            .memory(ResourceSpec {
784                min: "4Gi".to_string(),
785                desired: "4Gi".to_string(),
786            })
787            .permissions("loader".to_string())
788            .build();
789        let cluster = ComputeCluster::new("bear-runtime".to_string())
790            .capacity_group(CapacityGroup {
791                group_id: "general".to_string(),
792                instance_type: None,
793                profile: Some(MachineProfile {
794                    cpu: "4".to_string(),
795                    memory_bytes: 16 * 1024 * 1024 * 1024,
796                    ephemeral_storage_bytes: 20 * 1024 * 1024 * 1024,
797                    architecture: Some(Architecture::X86_64),
798                    gpu: None,
799                }),
800                min_size: 2,
801                max_size: 2,
802                scale_policy: Some(CapacityGroupScalePolicy::Fixed {
803                    machines: ComputeChoiceRange {
804                        min: 1,
805                        max: 5,
806                        default: 2,
807                    },
808                }),
809                nested_virtualization: Some(true),
810            })
811            .build();
812        let stack = Stack {
813            id: "bear".to_string(),
814            resources: [
815                (
816                    "bear-agent-loader".to_string(),
817                    ResourceEntry {
818                        config: Resource::new(daemon),
819                        lifecycle: ResourceLifecycle::Live,
820                        dependencies: Vec::new(),
821                        remote_access: false,
822                    },
823                ),
824                (
825                    "bear-runtime".to_string(),
826                    ResourceEntry {
827                        config: Resource::new(cluster),
828                        lifecycle: ResourceLifecycle::Frozen,
829                        dependencies: Vec::new(),
830                        remote_access: false,
831                    },
832                ),
833            ]
834            .into_iter()
835            .collect(),
836            permissions: crate::permissions::PermissionsConfig::default(),
837            supported_platforms: None,
838            inputs: vec![],
839        };
840
841        let plan = plan_compute(&stack, Platform::Aws, None).expect("plan should build");
842        let pool = plan.pools.first().expect("general pool should exist");
843        assert_eq!(pool.recommended.machine(), Some("m8i.2xlarge"));
844        assert_eq!(pool.recommended.min_size(), 2);
845        assert_eq!(pool.recommended.max_size(), 2);
846        assert_eq!(
847            pool.scale,
848            CapacityGroupScalePolicy::Fixed {
849                machines: ComputeChoiceRange {
850                    min: 1,
851                    max: 5,
852                    default: 2,
853                },
854            }
855        );
856        assert!(!pool
857            .machines
858            .iter()
859            .any(|option| option.machine == "m7g.2xlarge"));
860
861        let invalid_settings = ComputeSettings {
862            pools: [(
863                "general".to_string(),
864                ComputePoolSelection::Fixed {
865                    machines: 2,
866                    machine: Some("m7g.2xlarge".to_string()),
867                },
868            )]
869            .into_iter()
870            .collect(),
871        };
872        let invalid_plan = plan_compute(&stack, Platform::Aws, Some(&invalid_settings))
873            .expect("plan should build");
874        assert!(!invalid_plan.pools[0].errors.is_empty());
875    }
876
877    #[test]
878    fn local_plan_has_no_provider_machine_choices() {
879        let stack = stack_with_container();
880
881        let plan = plan_compute(&stack, Platform::Local, None).expect("plan should build");
882
883        let pool = plan.pools.first().expect("general pool should exist");
884        assert_eq!(pool.selected.machine(), None);
885        assert!(pool.machines.is_empty());
886        assert!(pool.errors.is_empty());
887    }
888}
alien_core/compute_planner.rs

alien_core/
compute_planner.rs