pub struct ConfigurableModelExecutor { /* private fields */ }Expand description
Model executor that produces a configurable sequence of tokens.
Implementations§
Source§impl ConfigurableModelExecutor
impl ConfigurableModelExecutor
Sourcepub fn with_token_sequence(vocab_size: usize, tokens: Vec<u32>) -> Self
pub fn with_token_sequence(vocab_size: usize, tokens: Vec<u32>) -> Self
Create executor that cycles through the given token sequence.
Sourcepub fn with_eos_after(vocab_size: usize, n: usize, eos_token: u32) -> Self
pub fn with_eos_after(vocab_size: usize, n: usize, eos_token: u32) -> Self
Create executor that emits EOS after n decode steps.
Trait Implementations§
Source§impl ModelExecutor for ConfigurableModelExecutor
impl ModelExecutor for ConfigurableModelExecutor
Source§fn prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
input: &'life1 PrefillInput,
) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
input: &'life1 PrefillInput,
) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Execute prefill phase (process initial prompt)
Source§fn decode<'life0, 'life1, 'async_trait>(
&'life0 self,
input: &'life1 DecodeInput,
) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn decode<'life0, 'life1, 'async_trait>(
&'life0 self,
input: &'life1 DecodeInput,
) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Execute decode phase (generate next token)
Source§fn capabilities(&self) -> ExecutorCapabilities
fn capabilities(&self) -> ExecutorCapabilities
Get executor capabilities
Source§fn status(&self) -> ExecutorStatus
fn status(&self) -> ExecutorStatus
Get current executor status
Source§fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Batch decode: process multiple sequences in one forward pass. Read more
Source§fn forward<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 Arc<dyn TensorLike>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn forward<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 Arc<dyn TensorLike>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Optional: full forward pass (for non-autoregressive use cases)
Source§fn truncate_kv<'life0, 'life1, 'async_trait>(
&'life0 self,
_kv_cache: &'life1 Arc<dyn KvCacheHandle>,
_new_len: usize,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn truncate_kv<'life0, 'life1, 'async_trait>(
&'life0 self,
_kv_cache: &'life1 Arc<dyn KvCacheHandle>,
_new_len: usize,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Roll the KV cache for this executor’s sequence back to
new_len.
Used by speculative decoding on partial rejection so the next
iteration sees a KV prefix that matches the accepted token stream.
Default: Ok(()) — executors that don’t cache per-sequence state
(stub, mock) are inherently tolerant; real LLM executors override.Source§fn forward_verify<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn forward_verify<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Multi-position decode-verify: one forward over
N+1 tokens,
producing one logits row per position. Used by speculative
decoding’s target path so we don’t pay N+1 sequential forwards. Read moreSource§fn warmup<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
fn warmup<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
Warm up executor (load model, allocate memory, etc.)
Source§fn shutdown<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
fn shutdown<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
Shutdown executor gracefully
Source§fn release_cache(&self, _cache_id: &str)
fn release_cache(&self, _cache_id: &str)
Release KV cache and state for a completed sequence. Read more
Auto Trait Implementations§
impl !Freeze for ConfigurableModelExecutor
impl RefUnwindSafe for ConfigurableModelExecutor
impl Send for ConfigurableModelExecutor
impl Sync for ConfigurableModelExecutor
impl Unpin for ConfigurableModelExecutor
impl UnsafeUnpin for ConfigurableModelExecutor
impl UnwindSafe for ConfigurableModelExecutor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more