Skip to main content

BatchModelExecutor

Trait BatchModelExecutor 

Source
pub trait BatchModelExecutor: ModelExecutor {
    // Required methods
    fn batch_prefill<'life0, 'life1, 'async_trait>(
        &'life0 self,
        inputs: &'life1 [PrefillInput],
    ) -> Pin<Box<dyn Future<Output = Result<Vec<PrefillOutput>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait;
    fn batch_decode<'life0, 'life1, 'async_trait>(
        &'life0 self,
        inputs: &'life1 [DecodeInput],
    ) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait;
    fn optimal_batch_size(&self) -> usize;
    fn supports_batch_size(&self, batch_size: usize) -> bool;
}
Expand description

Batch model executor for processing multiple requests efficiently

Required Methods§

Source

fn batch_prefill<'life0, 'life1, 'async_trait>( &'life0 self, inputs: &'life1 [PrefillInput], ) -> Pin<Box<dyn Future<Output = Result<Vec<PrefillOutput>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Execute batch prefill for multiple sequences

Source

fn batch_decode<'life0, 'life1, 'async_trait>( &'life0 self, inputs: &'life1 [DecodeInput], ) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Execute batch decode for multiple sequences

Source

fn optimal_batch_size(&self) -> usize

Get optimal batch size for current conditions

Source

fn supports_batch_size(&self, batch_size: usize) -> bool

Check if batch size is supported

Implementors§