use crate::priority::Priority;
#[derive(Debug, Clone)]
pub struct RequestMetadata {
pub priority: u8, pub sla_deadline: Option<u64>, pub batch_size: usize, pub input_size_bytes: Option<usize>, pub estimated_flops: Option<u64>, }
impl Default for RequestMetadata {
fn default() -> Self {
Self {
priority: 1,
sla_deadline: None,
batch_size: 1,
input_size_bytes: None,
estimated_flops: None,
}
}
}
const LIGHT_TASK_SIZE_BYTES: usize = 500_000; const LIGHT_TASK_FLOPS: u64 = 1_000_000_000; const COMBINED_WORK_THRESHOLD: usize = 2_000_000; const SLA_LATENCY_CRITICAL_MS: u64 = 250; const SMALL_BATCH_SIZE: usize = 4;
pub fn determine_priority(metadata: &RequestMetadata) -> Priority {
if metadata.priority == 0 {
return Priority::LatencyCritical;
}
if let Some(deadline_ms) = metadata.sla_deadline {
if deadline_ms < SLA_LATENCY_CRITICAL_MS {
return Priority::LatencyCritical;
}
}
if is_light_workload(metadata) {
return Priority::LatencyCritical;
}
if metadata.batch_size <= SMALL_BATCH_SIZE {
return Priority::LatencyCritical;
}
Priority::Throughput
}
fn is_light_workload(metadata: &RequestMetadata) -> bool {
if let Some(size) = metadata.input_size_bytes {
if size <= LIGHT_TASK_SIZE_BYTES {
return true; }
}
if let Some(flops) = metadata.estimated_flops {
if flops <= LIGHT_TASK_FLOPS {
return true; }
}
if let Some(size) = metadata.input_size_bytes {
let total_work = size * metadata.batch_size;
if total_work <= COMBINED_WORK_THRESHOLD {
return true; }
}
false
}
#[cfg(test)]
#[path = "request_metadata_tests.rs"]
mod request_metadata_tests;