pub enum ForwardEngine {
Llama(LlamaForward),
Phi(PhiForward),
}Expand description
Architecture-specific inference engine with KV-cache support.
Variants§
Llama(LlamaForward)
Phi(PhiForward)
Implementations§
Source§impl ForwardEngine
impl ForwardEngine
pub fn from_pretrained( info: ModelInfo, weight_paths: &[PathBuf], ) -> Result<Self>
pub fn from_weight_paths( info: ModelInfo, weight_paths: &[PathBuf], ) -> Result<Self>
pub fn from_weight_paths_with_backend( info: ModelInfo, weight_paths: &[PathBuf], backend: LlmBackendKind, ) -> Result<Self>
pub fn from_gguf(info: ModelInfo, path: &Path) -> Result<Self>
pub fn from_gguf_with_backend( info: ModelInfo, path: &Path, backend: LlmBackendKind, ) -> Result<Self>
Sourcepub fn from_gguf_mmap_with_backend(
info: ModelInfo,
path: &Path,
backend: LlmBackendKind,
) -> Result<Self>
pub fn from_gguf_mmap_with_backend( info: ModelInfo, path: &Path, backend: LlmBackendKind, ) -> Result<Self>
Load via memory-mapping — Q4_0/Q8_0 tensors are zero-copy from disk. The OS pages in weight blocks on demand; only active layers are resident.
pub fn reset_cache(&mut self)
pub fn forward_logits( &mut self, input_ids: &[u32], use_cache: bool, ) -> Result<Vec<f32>>
Sourcepub fn forward_all_logits(&mut self, input_ids: &[u32]) -> Result<Vec<Vec<f32>>>
pub fn forward_all_logits(&mut self, input_ids: &[u32]) -> Result<Vec<Vec<f32>>>
Run the model on input_ids WITHOUT updating the KV cache and return
logits for ALL positions. Used by speculative decoding to verify K draft
tokens in a single target-model forward pass.
pub fn embed(&mut self, input_ids: &[u32]) -> Result<Vec<f32>>
Auto Trait Implementations§
impl Freeze for ForwardEngine
impl !RefUnwindSafe for ForwardEngine
impl Send for ForwardEngine
impl Sync for ForwardEngine
impl Unpin for ForwardEngine
impl UnsafeUnpin for ForwardEngine
impl !UnwindSafe for ForwardEngine
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more