pub struct BertModelExecutor { /* private fields */ }Expand description
BERT Executor for embedding tasks
Implementations§
Source§impl BertModelExecutor
impl BertModelExecutor
Sourcepub fn new(
model: BertModelWrapper,
model_info: ModelInfo,
device: CandleDevice,
) -> Self
pub fn new( model: BertModelWrapper, model_info: ModelInfo, device: CandleDevice, ) -> Self
Create a new BERT executor
Sourcepub async fn from_path(
model_path: &str,
model_def: &ModelDefinition,
device: CandleDevice,
) -> Result<Self>
pub async fn from_path( model_path: &str, model_def: &ModelDefinition, device: CandleDevice, ) -> Result<Self>
Load BERT executor from path
Sourcepub fn get_embeddings(&self, input_ids: &[u32]) -> Result<Tensor>
pub fn get_embeddings(&self, input_ids: &[u32]) -> Result<Tensor>
Get embeddings for input tokens
Sourcepub fn model(&self) -> &BertModelWrapper
pub fn model(&self) -> &BertModelWrapper
Get model reference
Trait Implementations§
Source§impl ModelExecutor for BertModelExecutor
impl ModelExecutor for BertModelExecutor
Source§fn prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
input: &'life1 PrefillInput,
) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
input: &'life1 PrefillInput,
) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
For BERT, prefill returns the embeddings (not logits)
Source§fn decode<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 DecodeInput,
) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn decode<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 DecodeInput,
) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
BERT doesn’t support decode (it’s an encoder model)
Source§fn capabilities(&self) -> ExecutorCapabilities
fn capabilities(&self) -> ExecutorCapabilities
Get executor capabilities
Source§fn status(&self) -> ExecutorStatus
fn status(&self) -> ExecutorStatus
Get current executor status
Source§fn batch_prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [PrefillInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<PrefillOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn batch_prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [PrefillInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<PrefillOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Batch prefill: process multiple prompts’ prefill in ONE forward pass. Read more
Source§fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Batch decode: process multiple sequences in one forward pass. Read more
Source§fn unified_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
_batch: &'life1 UnifiedBatch,
) -> Pin<Box<dyn Future<Output = Result<Vec<Option<Vec<f32>>>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn unified_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
_batch: &'life1 UnifiedBatch,
) -> Pin<Box<dyn Future<Output = Result<Vec<Option<Vec<f32>>>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Unified mixed-batch forward: process a
UnifiedBatch containing
any combination of prefill chunks (one or more q_tokens per item,
possibly continuing from pos_offset > 0) and decode steps
(q_tokens.len() == 1, is_final_chunk = true) in a single model
forward pass. Read moreSource§fn forward<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 Arc<dyn TensorLike>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn forward<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 Arc<dyn TensorLike>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Optional: full forward pass (for non-autoregressive use cases)
Source§fn truncate_kv<'life0, 'life1, 'async_trait>(
&'life0 self,
_kv_cache: &'life1 Arc<dyn KvCacheHandle>,
_new_len: usize,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn truncate_kv<'life0, 'life1, 'async_trait>(
&'life0 self,
_kv_cache: &'life1 Arc<dyn KvCacheHandle>,
_new_len: usize,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Roll the KV cache for this executor’s sequence back to
new_len.
Used by speculative decoding on partial rejection so the next
iteration sees a KV prefix that matches the accepted token stream.
Default: Ok(()) — executors that don’t cache per-sequence state
(stub, mock) are inherently tolerant; real LLM executors override.Source§fn forward_verify<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn forward_verify<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Multi-position decode-verify: one forward over
N+1 tokens,
producing one logits row per position. Used by speculative
decoding’s target path so we don’t pay N+1 sequential forwards. Read moreSource§fn warmup<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
fn warmup<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
Warm up executor (load model, allocate memory, etc.)
Source§fn shutdown<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
fn shutdown<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
Shutdown executor gracefully
Source§fn release_cache(&self, _cache_id: &str)
fn release_cache(&self, _cache_id: &str)
Release KV cache and state for a completed sequence. Read more
Auto Trait Implementations§
impl !Freeze for BertModelExecutor
impl !RefUnwindSafe for BertModelExecutor
impl Send for BertModelExecutor
impl Sync for BertModelExecutor
impl Unpin for BertModelExecutor
impl UnsafeUnpin for BertModelExecutor
impl !UnwindSafe for BertModelExecutor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more