Struct llm_base::InferenceSession

source ·

pub struct InferenceSession { /* private fields */ }

Expand description

An inference session represents the state of the text generation. This holds the full context window, as long as several additional parameters used during sampling.

Safety

This implements Send as it can be sent to another thread. However, it does not implement Sync - it cannot be used from multiple threads at the same time.

Consider spawning multiple inference sessions for the same model if you need to use it from multiple threads.

Struct llm_base::InferenceSession

Implementations§

impl InferenceSession

pub fn feed_prompt<E: Error + 'static>( &mut self, model: &dyn Model, params: &InferenceParameters, prompt: &str, output_request: &mut EvaluateOutputRequest, callback: impl FnMut(&[u8]) -> Result<(), E> ) -> Result<(), InferenceError>

pub fn infer_next_token<'v>( &mut self, model: &'v dyn Model, params: &InferenceParameters, output_request: &mut EvaluateOutputRequest, rng: &mut impl Rng ) -> Result<&'v [u8], InferenceError>

pub fn infer<E: Error + 'static>( &mut self, model: &dyn Model, prompt: &str, output_request: &mut EvaluateOutputRequest, rng: &mut impl Rng, callback: impl FnMut(&str) -> Result<(), E> ) -> Result<InferenceStats, InferenceError>

pub fn sample_top_p_top_k( &self, params: &InferenceParameters, rng: &mut impl Rng ) -> TokenId

pub unsafe fn get_snapshot(&mut self) -> InferenceSnapshotRef<'_>

pub fn from_snapshot( snapshot: InferenceSnapshot, model: &dyn Model ) -> Result<Self, SnapshotError>

impl InferenceSession

pub fn new( params: InferenceSessionParameters, n_ctx: usize, n_layer: usize, n_embd: usize, n_vocab: usize ) -> InferenceSession

Trait Implementations§

impl Clone for InferenceSession

fn clone(&self) -> Self

fn clone_from(&mut self, source: &Self)

impl Send for InferenceSession

Auto Trait Implementations§

impl RefUnwindSafe for InferenceSession

impl !Sync for InferenceSession

impl Unpin for InferenceSession

impl UnwindSafe for InferenceSession

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V