mod fifo;
pub use fifo::FifoPolicy;
use async_trait::async_trait;
use std::collections::HashMap;
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Notify;
#[derive(Debug, Clone)]
pub struct PolicyContext {
pub target_model: String,
pub active_model: Option<String>,
pub target_queue_depth: usize,
pub oldest_waiting: Duration,
pub active_in_flight: usize,
pub active_duration: Duration,
pub estimated_switch_cost: Option<Duration>,
}
#[derive(Debug, Clone)]
pub struct ScheduleContext {
pub active_model: Option<String>,
pub active_duration: Duration,
pub queue_depths: HashMap<String, usize>,
pub active_in_flight: usize,
pub switch_costs: HashMap<String, Duration>,
}
pub struct SwitchContext {
pub from_model: Option<String>,
pub to_model: String,
in_flight_drained: Arc<Notify>,
get_in_flight: Box<dyn Fn() -> usize + Send + Sync>,
}
impl SwitchContext {
pub fn new(
from_model: Option<String>,
to_model: String,
in_flight_drained: Arc<Notify>,
get_in_flight: Box<dyn Fn() -> usize + Send + Sync>,
) -> Self {
Self {
from_model,
to_model,
in_flight_drained,
get_in_flight,
}
}
pub async fn wait_for_in_flight(&self) {
while (self.get_in_flight)() > 0 {
self.in_flight_drained.notified().await;
}
}
pub fn in_flight_count(&self) -> usize {
(self.get_in_flight)()
}
}
pub enum PolicyDecision {
SwitchNow,
Defer(Pin<Box<dyn Future<Output = ()> + Send + 'static>>),
Skip,
}
#[async_trait]
pub trait SwitchPolicy: Send + Sync {
async fn on_pending_request(&self, ctx: &PolicyContext) -> PolicyDecision;
async fn prepare_switch(&self, ctx: &mut SwitchContext);
fn on_switch_complete(&self, _from: &str, _to: &str, _duration: Duration) {}
fn request_timeout(&self) -> Option<Duration>;
fn min_active_duration(&self) -> Duration;
fn scheduler_interval(&self) -> Option<Duration> {
None
}
fn schedule_tick(&self, _ctx: &ScheduleContext) -> Option<String> {
None
}
}