pub struct BertModelExecutor { /* private fields */ }Expand description
BERT Executor for embedding tasks
Implementations§
Source§impl BertModelExecutor
impl BertModelExecutor
Sourcepub fn new(
model: BertModelWrapper,
model_info: ModelInfo,
device: CandleDevice,
) -> Self
pub fn new( model: BertModelWrapper, model_info: ModelInfo, device: CandleDevice, ) -> Self
Create a new BERT executor
Sourcepub async fn from_path(
model_path: &str,
model_def: &ModelDefinition,
device: CandleDevice,
) -> Result<Self>
pub async fn from_path( model_path: &str, model_def: &ModelDefinition, device: CandleDevice, ) -> Result<Self>
Load BERT executor from path
Sourcepub fn get_embeddings(&self, input_ids: &[u32]) -> Result<Tensor>
pub fn get_embeddings(&self, input_ids: &[u32]) -> Result<Tensor>
Get embeddings for input tokens
Sourcepub fn model(&self) -> &BertModelWrapper
pub fn model(&self) -> &BertModelWrapper
Get model reference
Trait Implementations§
Source§impl ModelExecutor for BertModelExecutor
impl ModelExecutor for BertModelExecutor
Source§fn prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
input: &'life1 PrefillInput,
) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
input: &'life1 PrefillInput,
) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
For BERT, prefill returns the embeddings (not logits)
Source§fn decode<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 DecodeInput,
) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn decode<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 DecodeInput,
) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
BERT doesn’t support decode (it’s an encoder model)
Source§fn capabilities(&self) -> ExecutorCapabilities
fn capabilities(&self) -> ExecutorCapabilities
Get executor capabilities
Source§fn status(&self) -> ExecutorStatus
fn status(&self) -> ExecutorStatus
Get current executor status
Source§fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Batch decode: process multiple sequences in one forward pass. Read more
Source§fn forward<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 Arc<dyn TensorLike>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn forward<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 Arc<dyn TensorLike>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Optional: full forward pass (for non-autoregressive use cases)
Source§fn warmup<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
fn warmup<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
Warm up executor (load model, allocate memory, etc.)
Source§fn shutdown<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
fn shutdown<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
Self: 'async_trait,
Shutdown executor gracefully
Source§fn release_cache(&self, _cache_id: &str)
fn release_cache(&self, _cache_id: &str)
Release KV cache and state for a completed sequence. Read more
Auto Trait Implementations§
impl !Freeze for BertModelExecutor
impl !RefUnwindSafe for BertModelExecutor
impl Send for BertModelExecutor
impl Sync for BertModelExecutor
impl Unpin for BertModelExecutor
impl UnsafeUnpin for BertModelExecutor
impl !UnwindSafe for BertModelExecutor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more