pub struct Llama32Runner { /* private fields */ }Implementations§
Source§impl Llama32Runner
impl Llama32Runner
pub fn builder() -> Llama32RunnerBuilder
pub fn config(&self) -> &Llama32Config
pub fn device(&self) -> Device
Sourcepub fn predict_logits(&mut self, prompt_ids: &[u32]) -> Result<Vec<f32>, Error>
pub fn predict_logits(&mut self, prompt_ids: &[u32]) -> Result<Vec<f32>, Error>
Single prefill forward; returns last-position logits [vocab].
pub fn generate_packed( &mut self, prompt_ids: &[u32], n_new: usize, on_token: impl FnMut(u32), ) -> Result<Vec<u32>, Error>
pub fn generate( &mut self, prompt_ids: &[u32], n_new: usize, on_token: impl FnMut(u32), ) -> Result<Vec<u32>, Error>
Trait Implementations§
Source§impl LmRunner for Llama32Runner
impl LmRunner for Llama32Runner
Source§fn vocab_size(&self) -> usize
fn vocab_size(&self) -> usize
LM head vocabulary size.
Source§fn predict_logits(&mut self, prompt_ids: &[u32]) -> Result<Vec<f32>, Error>
fn predict_logits(&mut self, prompt_ids: &[u32]) -> Result<Vec<f32>, Error>
Run prefill on
prompt_ids and return last-token logits.Source§fn generate(
&mut self,
prompt_ids: &[u32],
n_new: usize,
on_token: &mut dyn FnMut(u32) -> bool,
) -> Result<Vec<u32>, Error>
fn generate( &mut self, prompt_ids: &[u32], n_new: usize, on_token: &mut dyn FnMut(u32) -> bool, ) -> Result<Vec<u32>, Error>
Generate up to
n_new tokens after prompt_ids using greedy
(argmax) sampling. The default impl re-prefills on the full
context each step — per-family runners should override with
their cached decode fast path. Read moreSource§fn supports_multimodal(&self) -> bool
fn supports_multimodal(&self) -> bool
Whether this runner supports multimodal (image+text) generation.
Source§fn generate_multimodal(
&mut self,
_prompt: &str,
_rgb: &[u8],
_img_w: usize,
_img_h: usize,
_tokenizer: Option<&Path>,
_n_new: usize,
_on_token: &mut dyn FnMut(u32) -> bool,
) -> Result<Vec<u32>, Error>
fn generate_multimodal( &mut self, _prompt: &str, _rgb: &[u8], _img_w: usize, _img_h: usize, _tokenizer: Option<&Path>, _n_new: usize, _on_token: &mut dyn FnMut(u32) -> bool, ) -> Result<Vec<u32>, Error>
Multimodal generation: prefill with text where image markers are
spliced with vision embeddings derived from
rgb.Auto Trait Implementations§
impl !RefUnwindSafe for Llama32Runner
impl !Sync for Llama32Runner
impl !UnwindSafe for Llama32Runner
impl Freeze for Llama32Runner
impl Send for Llama32Runner
impl Unpin for Llama32Runner
impl UnsafeUnpin for Llama32Runner
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more