1use super::cluster::{ClusterConfig, NodeConfig};
15
16fn gpu_flops_factor(gpu_type: &str) -> f64 {
18 match gpu_type.to_lowercase().as_str() {
19 "rtx-4090" | "rtx4090" | "geforce-rtx-4090" => 1.0,
20 "rtx-4080" | "rtx4080" => 0.72,
21 "rtx-3090" | "rtx3090" => 0.55,
22 "rtx-3080" | "rtx3080" => 0.45,
23 "a100" | "a100-80gb" | "a100-40gb" => 1.2,
24 "h100" | "h100-80gb" => 2.0,
25 "jetson-orin" | "orin" => 0.06,
26 "jetson-nano" | "nano" => 0.02,
27 _ => 0.5, }
29}
30
31#[derive(Debug, Clone)]
33pub struct AdapterJob {
34 pub adapter_idx: usize,
36 pub budget_mb: u64,
38 pub label: String,
40}
41
42#[derive(Debug, Clone)]
44pub struct PlacementDecision {
45 pub adapter_idx: usize,
47 pub node_name: String,
49 pub score: f64,
51}
52
53#[derive(Debug, Clone, Default)]
55pub struct NodeLoad {
56 pub active_adapters: usize,
58 pub reserved_vram_mb: u64,
60}
61
62pub fn place_adapters(
70 cluster: &ClusterConfig,
71 jobs: &[AdapterJob],
72 initial_load: &[NodeLoad],
73) -> Vec<PlacementDecision> {
74 let mut loads: Vec<NodeLoad> = cluster
75 .nodes
76 .iter()
77 .enumerate()
78 .map(|(i, _)| initial_load.get(i).cloned().unwrap_or_default())
79 .collect();
80
81 let mut placements = Vec::new();
82
83 for job in jobs {
84 let best = find_best_node(cluster, job, &loads);
85 if let Some((node_idx, score)) = best {
86 let node = &cluster.nodes[node_idx];
87 placements.push(PlacementDecision {
88 adapter_idx: job.adapter_idx,
89 node_name: node.name.clone(),
90 score,
91 });
92 loads[node_idx].active_adapters += 1;
93 loads[node_idx].reserved_vram_mb += job.budget_mb;
94 }
95 }
96
97 placements
98}
99
100fn find_best_node(
101 cluster: &ClusterConfig,
102 job: &AdapterJob,
103 loads: &[NodeLoad],
104) -> Option<(usize, f64)> {
105 let mut best: Option<(usize, f64)> = None;
106
107 for (i, node) in cluster.nodes.iter().enumerate() {
108 let load = &loads[i];
109
110 if load.active_adapters >= node.max_adapters {
112 continue;
113 }
114
115 let score = score_node(node, job.budget_mb, load);
116 if score <= 0.0 {
117 continue;
118 }
119
120 match best {
121 None => best = Some((i, score)),
122 Some((_, best_score)) if score > best_score => best = Some((i, score)),
123 _ => {}
124 }
125 }
126
127 best
128}
129
130pub fn score_node(node: &NodeConfig, budget_mb: u64, load: &NodeLoad) -> f64 {
136 if budget_mb == 0 {
137 return 0.0;
138 }
139
140 let free_vram = free_vram_mb(node, load);
141 if free_vram < budget_mb {
142 return 0.0;
143 }
144
145 let vram_ratio = free_vram as f64 / budget_mb as f64;
146 let flops = node_flops_factor(node);
147 let load_factor = 1.0 / (1.0 + load.active_adapters as f64);
148
149 vram_ratio * flops * load_factor
150}
151
152fn free_vram_mb(node: &NodeConfig, load: &NodeLoad) -> u64 {
154 let usable = node.usable_vram_mb();
155 usable.saturating_sub(load.reserved_vram_mb)
156}
157
158fn node_flops_factor(node: &NodeConfig) -> f64 {
160 if node.gpus.is_empty() {
161 return 0.01; }
163 node.gpus.iter().map(|g| gpu_flops_factor(&g.gpu_type)).fold(0.0_f64, f64::max)
164}
165
166#[cfg(test)]
167mod tests {
168 #![allow(clippy::unwrap_used)]
169 use super::*;
170 use crate::gpu::cluster::ClusterConfig;
171
172 fn test_cluster() -> ClusterConfig {
173 ClusterConfig::from_yaml(
174 r"
175nodes:
176 - name: desktop
177 host: localhost
178 gpus:
179 - uuid: GPU-abcd-1234
180 type: rtx-4090
181 vram_mb: 24564
182 memory_type: discrete
183 max_adapters: 3
184 - name: jetson
185 host: jetson.local
186 transport: ssh
187 gpus:
188 - uuid: GPU-efgh-5678
189 type: jetson-orin
190 vram_mb: 8192
191 memory_type: unified
192 max_adapters: 1
193 - name: intel-box
194 host: 10.0.0.5
195 transport: ssh
196 user: noah
197 gpus: []
198 cpu_cores: 16
199 ram_mb: 65536
200 max_adapters: 1
201",
202 )
203 .unwrap()
204 }
205
206 #[test]
207 fn test_gpu_flops_known_types() {
208 assert!((gpu_flops_factor("rtx-4090") - 1.0).abs() < f64::EPSILON);
209 assert!((gpu_flops_factor("jetson-orin") - 0.06).abs() < f64::EPSILON);
210 assert!((gpu_flops_factor("h100") - 2.0).abs() < f64::EPSILON);
211 assert!((gpu_flops_factor("unknown-gpu") - 0.5).abs() < f64::EPSILON);
212 }
213
214 #[test]
215 fn test_score_node_desktop() {
216 let cluster = test_cluster();
217 let desktop = &cluster.nodes[0];
218 let load = NodeLoad::default();
219 let score = score_node(desktop, 8000, &load);
222 assert!(score > 2.5);
223 assert!(score < 2.7);
224 }
225
226 #[test]
227 fn test_score_node_insufficient_vram() {
228 let cluster = test_cluster();
229 let desktop = &cluster.nodes[0];
230 let load = NodeLoad::default();
231 let score = score_node(desktop, 25000, &load);
233 assert!((score - 0.0).abs() < f64::EPSILON);
234 }
235
236 #[test]
237 fn test_score_node_with_load() {
238 let cluster = test_cluster();
239 let desktop = &cluster.nodes[0];
240 let load = NodeLoad { active_adapters: 1, reserved_vram_mb: 8000 };
241 let score = score_node(desktop, 8000, &load);
244 assert!(score > 0.7);
245 assert!(score < 0.9);
246 }
247
248 #[test]
249 fn test_score_cpu_only_node() {
250 let cluster = test_cluster();
251 let intel = &cluster.nodes[2];
252 let load = NodeLoad::default();
253 let score = score_node(intel, 8000, &load);
255 assert!((score - 0.0).abs() < f64::EPSILON);
256 }
257
258 #[test]
259 fn test_place_single_adapter() {
260 let cluster = test_cluster();
261 let jobs =
262 vec![AdapterJob { adapter_idx: 0, budget_mb: 8000, label: "adapter-0".to_string() }];
263 let placements = place_adapters(&cluster, &jobs, &[]);
264 assert_eq!(placements.len(), 1);
265 assert_eq!(placements[0].node_name, "desktop"); assert_eq!(placements[0].adapter_idx, 0);
267 }
268
269 #[test]
270 fn test_place_multiple_adapters_greedy() {
271 let cluster = test_cluster();
272 let jobs: Vec<AdapterJob> = (0..4)
273 .map(|i| AdapterJob { adapter_idx: i, budget_mb: 6000, label: format!("adapter-{i}") })
274 .collect();
275 let placements = place_adapters(&cluster, &jobs, &[]);
276
277 assert_eq!(placements.len(), 3);
281 for p in &placements {
282 assert_eq!(p.node_name, "desktop");
283 }
284 }
285
286 #[test]
287 fn test_place_small_adapters_across_nodes() {
288 let cluster = test_cluster();
289 let jobs: Vec<AdapterJob> = (0..4)
290 .map(|i| AdapterJob {
291 adapter_idx: i,
292 budget_mb: 2000, label: format!("adapter-{i}"),
294 })
295 .collect();
296 let placements = place_adapters(&cluster, &jobs, &[]);
297
298 assert_eq!(placements.len(), 4);
300 let desktop_count = placements.iter().filter(|p| p.node_name == "desktop").count();
301 let jetson_count = placements.iter().filter(|p| p.node_name == "jetson").count();
302 assert_eq!(desktop_count, 3);
303 assert_eq!(jetson_count, 1);
304 }
305
306 #[test]
307 fn test_place_no_capacity() {
308 let cluster = test_cluster();
309 let jobs = vec![AdapterJob {
310 adapter_idx: 0,
311 budget_mb: 30000, label: "too-big".to_string(),
313 }];
314 let placements = place_adapters(&cluster, &jobs, &[]);
315 assert!(placements.is_empty());
316 }
317
318 #[test]
319 fn test_place_with_initial_load() {
320 let cluster = test_cluster();
321 let jobs =
322 vec![AdapterJob { adapter_idx: 0, budget_mb: 6000, label: "adapter-0".to_string() }];
323 let load = vec![
325 NodeLoad { active_adapters: 3, reserved_vram_mb: 18000 },
326 NodeLoad::default(),
327 NodeLoad::default(),
328 ];
329 let placements = place_adapters(&cluster, &jobs, &load);
330 assert!(placements.is_empty());
332 }
333
334 #[test]
335 fn test_node_flops_factor_multi_gpu() {
336 let node = NodeConfig {
338 name: "multi".to_string(),
339 host: "localhost".to_string(),
340 transport: super::super::cluster::Transport::Local,
341 user: None,
342 gpus: vec![
343 super::super::cluster::GpuConfig {
344 uuid: "GPU-1".to_string(),
345 gpu_type: "rtx-3080".to_string(),
346 vram_mb: 10240,
347 memory_type: super::super::cluster::MemoryType::Discrete,
348 },
349 super::super::cluster::GpuConfig {
350 uuid: "GPU-2".to_string(),
351 gpu_type: "rtx-4090".to_string(),
352 vram_mb: 24564,
353 memory_type: super::super::cluster::MemoryType::Discrete,
354 },
355 ],
356 max_adapters: 4,
357 cpu_cores: None,
358 ram_mb: None,
359 };
360 let flops = node_flops_factor(&node);
361 assert!((flops - 1.0).abs() < f64::EPSILON); }
363}