pub trait BatchModelExecutor: ModelExecutor {
// Required methods
fn batch_prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [PrefillInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<PrefillOutput>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
fn optimal_batch_size(&self) -> usize;
fn supports_batch_size(&self, batch_size: usize) -> bool;
}Expand description
Batch model executor for processing multiple requests efficiently
Required Methods§
Sourcefn batch_prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [PrefillInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<PrefillOutput>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn batch_prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [PrefillInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<PrefillOutput>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Execute batch prefill for multiple sequences
Sourcefn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Execute batch decode for multiple sequences
Sourcefn optimal_batch_size(&self) -> usize
fn optimal_batch_size(&self) -> usize
Get optimal batch size for current conditions
Sourcefn supports_batch_size(&self, batch_size: usize) -> bool
fn supports_batch_size(&self, batch_size: usize) -> bool
Check if batch size is supported