pub trait SpeculativeExecutor: ModelExecutor {
// Required method
fn speculative_decode<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
input: &'life1 DecodeInput,
draft_tokens: &'life2 [TokenId],
acceptance_threshold: f32,
) -> Pin<Box<dyn Future<Output = Result<SpeculativeDecodeOutput>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait;
}Expand description
Speculative execution support
Required Methods§
Sourcefn speculative_decode<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
input: &'life1 DecodeInput,
draft_tokens: &'life2 [TokenId],
acceptance_threshold: f32,
) -> Pin<Box<dyn Future<Output = Result<SpeculativeDecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
fn speculative_decode<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
input: &'life1 DecodeInput,
draft_tokens: &'life2 [TokenId],
acceptance_threshold: f32,
) -> Pin<Box<dyn Future<Output = Result<SpeculativeDecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
Execute speculative decoding with draft model