infernum_arbiter/
allocation.rs1use crate::priority::{Priority, WorkloadType};
6use serde::{Deserialize, Serialize};
7use std::time::Instant;
8
9#[derive(Debug, Clone)]
11pub struct Allocation {
12 pub id: String,
14 pub workload_type: WorkloadType,
16 pub priority: Priority,
18 pub memory_allocated: u64,
20 pub quality_target: f32,
22 pub created_at: Instant,
24}
25
26impl Allocation {
27 pub fn age(&self) -> std::time::Duration {
29 self.created_at.elapsed()
30 }
31
32 pub fn is_llm(&self) -> bool {
34 matches!(self.workload_type, WorkloadType::LlmInference)
35 }
36
37 pub fn is_diffusion(&self) -> bool {
39 matches!(
40 self.workload_type,
41 WorkloadType::ImageGeneration | WorkloadType::VideoGeneration
42 )
43 }
44
45 pub fn memory_mb(&self) -> u64 {
47 self.memory_allocated / (1024 * 1024)
48 }
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct AllocationRequest {
54 pub workload_type: WorkloadType,
56 pub priority: Priority,
58 pub memory_required: u64,
60 pub min_quality: Option<f32>,
62 pub wait_for_resources: bool,
64 pub timeout_ms: Option<u64>,
66 pub metadata: Option<String>,
68}
69
70impl AllocationRequest {
71 pub fn llm(memory_required: u64) -> Self {
73 Self {
74 workload_type: WorkloadType::LlmInference,
75 priority: Priority::Normal,
76 memory_required,
77 min_quality: None,
78 wait_for_resources: true,
79 timeout_ms: Some(30_000),
80 metadata: None,
81 }
82 }
83
84 pub fn image(memory_required: u64) -> Self {
86 Self {
87 workload_type: WorkloadType::ImageGeneration,
88 priority: Priority::Normal,
89 memory_required,
90 min_quality: None,
91 wait_for_resources: true,
92 timeout_ms: Some(60_000),
93 metadata: None,
94 }
95 }
96
97 pub fn video(memory_required: u64) -> Self {
99 Self {
100 workload_type: WorkloadType::VideoGeneration,
101 priority: Priority::Normal,
102 memory_required,
103 min_quality: None,
104 wait_for_resources: true,
105 timeout_ms: Some(120_000),
106 metadata: None,
107 }
108 }
109
110 pub fn with_priority(mut self, priority: Priority) -> Self {
112 self.priority = priority;
113 self
114 }
115
116 pub fn with_min_quality(mut self, min_quality: f32) -> Self {
118 self.min_quality = Some(min_quality.clamp(0.0, 1.0));
119 self
120 }
121
122 pub fn no_wait(mut self) -> Self {
124 self.wait_for_resources = false;
125 self.timeout_ms = None;
126 self
127 }
128
129 pub fn with_timeout(mut self, timeout_ms: u64) -> Self {
131 self.timeout_ms = Some(timeout_ms);
132 self
133 }
134
135 pub fn with_metadata(mut self, metadata: impl Into<String>) -> Self {
137 self.metadata = Some(metadata.into());
138 self
139 }
140
141 pub fn effective_min_quality(&self) -> f32 {
143 self.min_quality
144 .unwrap_or_else(|| self.workload_type.min_quality())
145 }
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize)]
150pub enum AllocationResult {
151 Success {
153 quality: f32,
155 memory: u64,
157 },
158 InsufficientMemory {
160 requested: u64,
162 available: u64,
164 },
165 InsufficientQuality {
167 requested: f32,
169 achievable: f32,
171 },
172 Timeout {
174 waited_ms: u64,
176 },
177 Preempted,
179}
180
181impl AllocationResult {
182 pub fn is_success(&self) -> bool {
184 matches!(self, Self::Success { .. })
185 }
186
187 pub fn quality(&self) -> Option<f32> {
189 match self {
190 Self::Success { quality, .. } => Some(*quality),
191 _ => None,
192 }
193 }
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199
200 #[test]
201 fn test_allocation_age() {
202 let alloc = Allocation {
203 id: "test".to_string(),
204 workload_type: WorkloadType::LlmInference,
205 priority: Priority::Normal,
206 memory_allocated: 1024,
207 quality_target: 1.0,
208 created_at: Instant::now(),
209 };
210
211 std::thread::sleep(std::time::Duration::from_millis(10));
212 assert!(alloc.age().as_millis() >= 10);
213 }
214
215 #[test]
216 fn test_request_builder() {
217 let req = AllocationRequest::llm(1024 * 1024 * 1024)
218 .with_priority(Priority::High)
219 .with_min_quality(0.8)
220 .with_metadata("test inference");
221
222 assert!(matches!(req.workload_type, WorkloadType::LlmInference));
223 assert!(matches!(req.priority, Priority::High));
224 assert_eq!(req.min_quality, Some(0.8));
225 assert!(req.wait_for_resources);
226 }
227
228 #[test]
229 fn test_effective_min_quality() {
230 let req = AllocationRequest::llm(1024);
231 assert!((req.effective_min_quality() - 0.4).abs() < 0.001);
233
234 let req_with_min = req.with_min_quality(0.6);
235 assert!((req_with_min.effective_min_quality() - 0.6).abs() < 0.001);
236 }
237
238 #[test]
239 fn test_allocation_result() {
240 let success = AllocationResult::Success {
241 quality: 0.9,
242 memory: 1024,
243 };
244 assert!(success.is_success());
245 assert_eq!(success.quality(), Some(0.9));
246
247 let failure = AllocationResult::InsufficientMemory {
248 requested: 1000,
249 available: 500,
250 };
251 assert!(!failure.is_success());
252 assert_eq!(failure.quality(), None);
253 }
254}