Skip to main content

infernum_arbiter/
priority.rs

1//! Priority and workload type definitions.
2//!
3//! Defines the priority levels and workload categories for GPU scheduling.
4
5use serde::{Deserialize, Serialize};
6
7/// Priority levels for workload scheduling.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
9pub enum Priority {
10    /// Background tasks - yield to everything.
11    Background = 0,
12    /// Low priority - nice to have.
13    Low = 1,
14    /// Normal priority - default.
15    Normal = 2,
16    /// High priority - user-facing work.
17    High = 3,
18    /// Critical - must complete, preempts others.
19    Critical = 4,
20}
21
22impl Default for Priority {
23    fn default() -> Self {
24        Self::Normal
25    }
26}
27
28impl Priority {
29    /// Returns whether this priority can preempt another.
30    pub fn can_preempt(self, other: Self) -> bool {
31        self as u8 > other as u8 + 1
32    }
33
34    /// Returns the quality multiplier for this priority.
35    ///
36    /// Higher priority workloads get better quality allocations.
37    pub fn quality_multiplier(self) -> f32 {
38        match self {
39            Self::Background => 0.6,
40            Self::Low => 0.8,
41            Self::Normal => 1.0,
42            Self::High => 1.1,
43            Self::Critical => 1.2,
44        }
45    }
46
47    /// Returns the timeout multiplier for this priority.
48    ///
49    /// Higher priority workloads get more time.
50    pub fn timeout_multiplier(self) -> f32 {
51        match self {
52            Self::Background => 0.5,
53            Self::Low => 0.75,
54            Self::Normal => 1.0,
55            Self::High => 1.5,
56            Self::Critical => 2.0,
57        }
58    }
59}
60
61/// Types of workloads the Arbiter manages.
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
63pub enum WorkloadType {
64    /// LLM inference (Infernum).
65    LlmInference,
66    /// Image generation (Dantalion diffusion).
67    ImageGeneration,
68    /// Video generation (Dantalion video diffusion).
69    VideoGeneration,
70}
71
72impl WorkloadType {
73    /// Returns the minimum quality threshold for this workload.
74    ///
75    /// Below this, results are too degraded to be useful.
76    pub fn min_quality(self) -> f32 {
77        match self {
78            Self::LlmInference => 0.4,     // LLM needs more precision
79            Self::ImageGeneration => 0.3,  // Images more tolerant
80            Self::VideoGeneration => 0.25, // Video very tolerant at high timesteps
81        }
82    }
83
84    /// Returns the target quality for this workload at full resources.
85    pub fn target_quality(self) -> f32 {
86        match self {
87            Self::LlmInference => 1.0,
88            Self::ImageGeneration => 1.0,
89            Self::VideoGeneration => 1.0,
90        }
91    }
92
93    /// Returns typical memory usage estimate in bytes.
94    pub fn typical_memory_mb(self) -> u64 {
95        match self {
96            Self::LlmInference => 8 * 1024,     // 8GB typical for inference
97            Self::ImageGeneration => 6 * 1024,  // 6GB for SDXL
98            Self::VideoGeneration => 12 * 1024, // 12GB for video
99        }
100    }
101
102    /// Returns whether this workload type is streaming (continuous output).
103    pub fn is_streaming(self) -> bool {
104        match self {
105            Self::LlmInference => true,     // Token by token
106            Self::ImageGeneration => false, // Single output
107            Self::VideoGeneration => true,  // Frame by frame
108        }
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn test_priority_ordering() {
118        assert!(Priority::Critical > Priority::High);
119        assert!(Priority::High > Priority::Normal);
120        assert!(Priority::Normal > Priority::Low);
121        assert!(Priority::Low > Priority::Background);
122    }
123
124    #[test]
125    fn test_preemption() {
126        assert!(Priority::Critical.can_preempt(Priority::Normal));
127        assert!(Priority::Critical.can_preempt(Priority::Low));
128        assert!(!Priority::High.can_preempt(Priority::Normal));
129        assert!(!Priority::Normal.can_preempt(Priority::Normal));
130    }
131
132    #[test]
133    fn test_quality_multipliers() {
134        assert!(Priority::Critical.quality_multiplier() > Priority::Normal.quality_multiplier());
135        assert!(Priority::Normal.quality_multiplier() > Priority::Background.quality_multiplier());
136    }
137
138    #[test]
139    fn test_workload_min_quality() {
140        assert!(
141            WorkloadType::LlmInference.min_quality() > WorkloadType::VideoGeneration.min_quality()
142        );
143    }
144}