Skip to main content

infernum_arbiter/
allocation.rs

1//! Allocation types and requests.
2//!
3//! Represents GPU memory allocations and requests for workloads.
4
5use crate::priority::{Priority, WorkloadType};
6use serde::{Deserialize, Serialize};
7use std::time::Instant;
8
9/// A GPU memory allocation.
10#[derive(Debug, Clone)]
11pub struct Allocation {
12    /// Unique allocation ID.
13    pub id: String,
14    /// Type of workload.
15    pub workload_type: WorkloadType,
16    /// Priority level.
17    pub priority: Priority,
18    /// Memory allocated in bytes.
19    pub memory_allocated: u64,
20    /// Quality target (0.0 - 1.0).
21    pub quality_target: f32,
22    /// When this allocation was created.
23    pub created_at: Instant,
24}
25
26impl Allocation {
27    /// Returns how long this allocation has been active.
28    pub fn age(&self) -> std::time::Duration {
29        self.created_at.elapsed()
30    }
31
32    /// Returns whether this is an LLM workload.
33    pub fn is_llm(&self) -> bool {
34        matches!(self.workload_type, WorkloadType::LlmInference)
35    }
36
37    /// Returns whether this is a diffusion workload.
38    pub fn is_diffusion(&self) -> bool {
39        matches!(
40            self.workload_type,
41            WorkloadType::ImageGeneration | WorkloadType::VideoGeneration
42        )
43    }
44
45    /// Returns memory in megabytes.
46    pub fn memory_mb(&self) -> u64 {
47        self.memory_allocated / (1024 * 1024)
48    }
49}
50
51/// A request for GPU allocation.
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct AllocationRequest {
54    /// Type of workload.
55    pub workload_type: WorkloadType,
56    /// Priority level.
57    pub priority: Priority,
58    /// Memory required in bytes.
59    pub memory_required: u64,
60    /// Minimum acceptable quality.
61    pub min_quality: Option<f32>,
62    /// Whether to wait for resources or fail immediately.
63    pub wait_for_resources: bool,
64    /// Maximum time to wait if waiting.
65    pub timeout_ms: Option<u64>,
66    /// Metadata for tracking.
67    pub metadata: Option<String>,
68}
69
70impl AllocationRequest {
71    /// Creates a new request for LLM inference.
72    pub fn llm(memory_required: u64) -> Self {
73        Self {
74            workload_type: WorkloadType::LlmInference,
75            priority: Priority::Normal,
76            memory_required,
77            min_quality: None,
78            wait_for_resources: true,
79            timeout_ms: Some(30_000),
80            metadata: None,
81        }
82    }
83
84    /// Creates a new request for image generation.
85    pub fn image(memory_required: u64) -> Self {
86        Self {
87            workload_type: WorkloadType::ImageGeneration,
88            priority: Priority::Normal,
89            memory_required,
90            min_quality: None,
91            wait_for_resources: true,
92            timeout_ms: Some(60_000),
93            metadata: None,
94        }
95    }
96
97    /// Creates a new request for video generation.
98    pub fn video(memory_required: u64) -> Self {
99        Self {
100            workload_type: WorkloadType::VideoGeneration,
101            priority: Priority::Normal,
102            memory_required,
103            min_quality: None,
104            wait_for_resources: true,
105            timeout_ms: Some(120_000),
106            metadata: None,
107        }
108    }
109
110    /// Sets priority.
111    pub fn with_priority(mut self, priority: Priority) -> Self {
112        self.priority = priority;
113        self
114    }
115
116    /// Sets minimum quality.
117    pub fn with_min_quality(mut self, min_quality: f32) -> Self {
118        self.min_quality = Some(min_quality.clamp(0.0, 1.0));
119        self
120    }
121
122    /// Sets to fail immediately if resources unavailable.
123    pub fn no_wait(mut self) -> Self {
124        self.wait_for_resources = false;
125        self.timeout_ms = None;
126        self
127    }
128
129    /// Sets timeout.
130    pub fn with_timeout(mut self, timeout_ms: u64) -> Self {
131        self.timeout_ms = Some(timeout_ms);
132        self
133    }
134
135    /// Sets metadata.
136    pub fn with_metadata(mut self, metadata: impl Into<String>) -> Self {
137        self.metadata = Some(metadata.into());
138        self
139    }
140
141    /// Returns effective minimum quality.
142    pub fn effective_min_quality(&self) -> f32 {
143        self.min_quality
144            .unwrap_or_else(|| self.workload_type.min_quality())
145    }
146}
147
148/// Result of an allocation attempt.
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub enum AllocationResult {
151    /// Allocation succeeded.
152    Success {
153        /// Allocated quality target.
154        quality: f32,
155        /// Memory allocated in bytes.
156        memory: u64,
157    },
158    /// Insufficient memory.
159    InsufficientMemory {
160        /// Memory requested.
161        requested: u64,
162        /// Memory available.
163        available: u64,
164    },
165    /// Quality requirements cannot be met.
166    InsufficientQuality {
167        /// Minimum quality requested.
168        requested: f32,
169        /// Maximum quality achievable.
170        achievable: f32,
171    },
172    /// Timed out waiting.
173    Timeout {
174        /// How long we waited.
175        waited_ms: u64,
176    },
177    /// Request was preempted by higher priority.
178    Preempted,
179}
180
181impl AllocationResult {
182    /// Returns whether allocation succeeded.
183    pub fn is_success(&self) -> bool {
184        matches!(self, Self::Success { .. })
185    }
186
187    /// Returns quality if successful.
188    pub fn quality(&self) -> Option<f32> {
189        match self {
190            Self::Success { quality, .. } => Some(*quality),
191            _ => None,
192        }
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    #[test]
201    fn test_allocation_age() {
202        let alloc = Allocation {
203            id: "test".to_string(),
204            workload_type: WorkloadType::LlmInference,
205            priority: Priority::Normal,
206            memory_allocated: 1024,
207            quality_target: 1.0,
208            created_at: Instant::now(),
209        };
210
211        std::thread::sleep(std::time::Duration::from_millis(10));
212        assert!(alloc.age().as_millis() >= 10);
213    }
214
215    #[test]
216    fn test_request_builder() {
217        let req = AllocationRequest::llm(1024 * 1024 * 1024)
218            .with_priority(Priority::High)
219            .with_min_quality(0.8)
220            .with_metadata("test inference");
221
222        assert!(matches!(req.workload_type, WorkloadType::LlmInference));
223        assert!(matches!(req.priority, Priority::High));
224        assert_eq!(req.min_quality, Some(0.8));
225        assert!(req.wait_for_resources);
226    }
227
228    #[test]
229    fn test_effective_min_quality() {
230        let req = AllocationRequest::llm(1024);
231        // Should use workload default
232        assert!((req.effective_min_quality() - 0.4).abs() < 0.001);
233
234        let req_with_min = req.with_min_quality(0.6);
235        assert!((req_with_min.effective_min_quality() - 0.6).abs() < 0.001);
236    }
237
238    #[test]
239    fn test_allocation_result() {
240        let success = AllocationResult::Success {
241            quality: 0.9,
242            memory: 1024,
243        };
244        assert!(success.is_success());
245        assert_eq!(success.quality(), Some(0.9));
246
247        let failure = AllocationResult::InsufficientMemory {
248            requested: 1000,
249            available: 500,
250        };
251        assert!(!failure.is_success());
252        assert_eq!(failure.quality(), None);
253    }
254}