cbtop/predictive_scheduler/
types.rs1use std::collections::HashMap;
4use std::time::{Duration, Instant};
5
6#[derive(Debug, Clone)]
8pub struct PredictiveSchedulerConfig {
9 pub target_slo_compliance: f64,
11 pub max_cost_per_op: f64,
13 pub enable_spot_instances: bool,
15 pub preemption_buffer: Duration,
17 pub load_decay_factor: f64,
19 pub min_capacity_threshold: f64,
21 pub slo_violation_penalty: f64,
23 pub history_window: usize,
25}
26
27impl Default for PredictiveSchedulerConfig {
28 fn default() -> Self {
29 Self {
30 target_slo_compliance: 0.99,
31 max_cost_per_op: 1.0,
32 enable_spot_instances: true,
33 preemption_buffer: Duration::from_secs(300), load_decay_factor: 0.9,
35 min_capacity_threshold: 0.8,
36 slo_violation_penalty: 10.0,
37 history_window: 100,
38 }
39 }
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
44pub enum InstanceType {
45 OnDemand,
47 Spot,
49 Reserved,
51 Preemptible,
53}
54
55impl InstanceType {
56 pub fn cost_multiplier(&self) -> f64 {
58 match self {
59 Self::OnDemand => 1.0,
60 Self::Spot => 0.3, Self::Reserved => 0.6, Self::Preemptible => 0.2, }
64 }
65
66 pub fn reliability(&self) -> f64 {
68 match self {
69 Self::OnDemand => 0.9999,
70 Self::Reserved => 0.9999,
71 Self::Spot => 0.85,
72 Self::Preemptible => 0.90,
73 }
74 }
75}
76
77#[derive(Debug, Clone)]
79pub struct HostProfile {
80 pub host_id: String,
82 pub instance_type: InstanceType,
84 pub compute_capacity: f64,
86 pub memory_capacity: u64,
88 pub current_load: f64,
90 pub hourly_cost: f64,
92 pub network_latency_ms: f64,
94 pub historical_slo_compliance: f64,
96 pub preemption_deadline: Option<Instant>,
98 pub performance_cv: f64,
100}
101
102impl HostProfile {
103 pub fn new(host_id: impl Into<String>, instance_type: InstanceType) -> Self {
105 Self {
106 host_id: host_id.into(),
107 instance_type,
108 compute_capacity: 1000.0,
109 memory_capacity: 8 * 1024 * 1024 * 1024, current_load: 0.0,
111 hourly_cost: 1.0,
112 network_latency_ms: 1.0,
113 historical_slo_compliance: 0.99,
114 preemption_deadline: None,
115 performance_cv: 0.1,
116 }
117 }
118
119 pub fn cost_per_op(&self) -> f64 {
121 let base_cost = self.hourly_cost * self.instance_type.cost_multiplier();
122 let effective_capacity = self.compute_capacity * (1.0 - self.current_load);
124 if effective_capacity > 0.0 {
125 base_cost / (effective_capacity * 3600.0)
126 } else {
127 f64::MAX
128 }
129 }
130
131 pub fn available_capacity(&self) -> f64 {
133 (1.0 - self.current_load).max(0.0)
134 }
135
136 pub fn time_until_preemption(&self) -> Option<Duration> {
138 self.preemption_deadline.map(|deadline| {
139 let now = Instant::now();
140 if deadline > now {
141 deadline - now
142 } else {
143 Duration::ZERO
144 }
145 })
146 }
147
148 pub fn is_safe_for_scheduling(&self, buffer: Duration) -> bool {
150 match self.time_until_preemption() {
151 Some(time_left) => time_left > buffer,
152 None => true, }
154 }
155}
156
157#[derive(Debug, Clone)]
159pub struct WorkloadSpec {
160 pub workload_id: String,
162 pub operation_count: u64,
164 pub memory_required: u64,
166 pub slo_deadline: Duration,
168 pub priority: u32,
170 pub preemptible: bool,
172 pub compute_intensity: f64,
174}
175
176impl WorkloadSpec {
177 pub fn new(workload_id: impl Into<String>, operation_count: u64) -> Self {
179 Self {
180 workload_id: workload_id.into(),
181 operation_count,
182 memory_required: 1024 * 1024, slo_deadline: Duration::from_millis(100),
184 priority: 1,
185 preemptible: true,
186 compute_intensity: 100.0,
187 }
188 }
189
190 pub fn estimated_execution_time(&self, host: &HostProfile) -> Duration {
192 let ops_per_sec = host.compute_capacity * host.available_capacity();
193 if ops_per_sec > 0.0 {
194 let seconds = self.operation_count as f64 / ops_per_sec;
195 Duration::from_secs_f64(seconds)
196 } else {
197 Duration::MAX
198 }
199 }
200}
201
202#[derive(Debug, Clone)]
204pub struct SchedulingDecision {
205 pub host_id: String,
207 pub predicted_time: Duration,
209 pub predicted_cost: f64,
211 pub slo_compliance_prob: f64,
213 pub score: f64,
215 pub reason: String,
217}
218
219#[derive(Debug, Clone, Default)]
221pub struct SchedulerMetrics {
222 pub total_decisions: u64,
224 pub slo_violations: u64,
226 pub total_cost: f64,
228 pub avg_scheduling_latency_us: f64,
230 pub host_utilization: HashMap<String, f64>,
232 pub spot_savings: f64,
234}
235
236impl SchedulerMetrics {
237 pub fn slo_compliance_rate(&self) -> f64 {
239 if self.total_decisions > 0 {
240 1.0 - (self.slo_violations as f64 / self.total_decisions as f64)
241 } else {
242 1.0
243 }
244 }
245
246 pub fn avg_cost_per_decision(&self) -> f64 {
248 if self.total_decisions > 0 {
249 self.total_cost / self.total_decisions as f64
250 } else {
251 0.0
252 }
253 }
254}