Struct TimeSlicePolicy

Source

pub struct TimeSlicePolicy { /* private fields */ }

Expand description

Drain-first scheduling policy with a proactive background scheduler.

This policy minimizes GPU time wasted on model switches by following two principles:

Never preempt a serving model. When a request arrives for a non-active model, the policy defers to the background scheduler rather than switching reactively. The only exception is the staleness bound, which forces a switch if any request has waited longer than max_wait.
Switch when idle. The background scheduler periodically checks all models’ queue depths. When the active model has completely drained its queue (no pending requests, no in-flight), the scheduler switches to the model with the most waiting requests.

This is equivalent to “serve everything from the active model’s queue, then switch to whoever has the most demand.” The scheduler’s global visibility into all queue depths prevents the pathological back-and-forth switching that reactive policies cause under interleaved or dominant workloads.

In simulation across 12 workload profiles at switch costs from 2s to 20s, this policy achieves 61-94% GPU serving time vs CostAware’s 40-81% and FIFO’s 33-79%, while also delivering 2-6x lower maximum wait times.

TimeSlicePolicy

Struct TimeSlicePolicy Copy item path

Implementations§

impl TimeSlicePolicy

pub fn new( eviction: EvictionPolicy, request_timeout: Duration, min_active_duration: Duration, max_wait: Duration, _min_quantum: Duration, tick_interval: Duration, _model_names: Vec<String>, ) -> Self

Trait Implementations§

impl SwitchPolicy for TimeSlicePolicy

fn on_pending_request<'life0, 'life1, 'async_trait>( &'life0 self, ctx: &'life1 PolicyContext, ) -> Pin<Box<dyn Future<Output = PolicyDecision> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn prepare_switch<'life0, 'life1, 'async_trait>( &'life0 self, ctx: &'life1 mut SwitchContext, ) -> Pin<Box<dyn Future<Output = ()> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn eviction_policy(&self) -> EvictionPolicy

fn request_timeout(&self) -> Duration

fn min_active_duration(&self) -> Duration

fn scheduler_interval(&self) -> Option<Duration>

fn schedule_tick(&self, ctx: &ScheduleContext) -> Option<String>

fn on_switch_complete(&self, _from: &str, _to: &str, _duration: Duration)

Auto Trait Implementations§

impl Freeze for TimeSlicePolicy

impl RefUnwindSafe for TimeSlicePolicy

impl Send for TimeSlicePolicy

impl Sync for TimeSlicePolicy

impl Unpin for TimeSlicePolicy

impl UnsafeUnpin for TimeSlicePolicy

impl UnwindSafe for TimeSlicePolicy

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> Same for T

type Output = T

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct TimeSlicePolicy

fn on_pending_request<'life0, 'life1, 'async_trait>( &'life0 self, ctx: &'life1 PolicyContext, ) -> Pin<Box<dyn Future<Output = PolicyDecision> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn prepare_switch<'life0, 'life1, 'async_trait>( &'life0 self, ctx: &'life1 mut SwitchContext, ) -> Pin<Box<dyn Future<Output = ()> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,