pub struct LlamaContext<'a> {
    pub model: &'a LlamaModel,
    /* private fields */
}
Expand description

Safe wrapper around llama_context.

Fields§

§model: &'a LlamaModel

a reference to the contexts model.

Implementations§

source§

impl LlamaContext<'_>

source

pub fn copy_cache(&mut self, src: i32, dest: i32, size: i32)

Copy the cache from one sequence to another.

Parameters
  • src - The sequence id to copy the cache from.
  • dest - The sequence id to copy the cache to.
  • size - The size of the cache to copy.
source

pub fn clear_kv_cache_seq(&mut self, src: i32, p0: Option<u16>, p1: Option<u16>)

Clear the kv cache for the given sequence.

Parameters
  • src - The sequence id to clear the cache for.
  • p0 - The start position of the cache to clear. If None, the entire cache is cleared up to [p1].
  • p1 - The end position of the cache to clear. If None, the entire cache is cleared from [p0].
source§

impl LlamaContext<'_>

source

pub fn sample(&mut self, sampler: Sampler<'_>) -> LlamaToken

Sample a token.

Panics
  • sampler contains no tokens
source

pub fn grammar_accept_token( &mut self, grammar: &mut LlamaGrammar, token: LlamaToken )

Accept a token into the grammar.

source

pub fn sample_grammar( &mut self, llama_token_data_array: &mut LlamaTokenDataArray, llama_grammar: &LlamaGrammar )

Perform grammar sampling.

source

pub fn sample_temp( &self, token_data: &mut LlamaTokenDataArray, temperature: f32 )

Modify [token_data] in place using temperature sampling.

Panics
  • [temperature] is not between 0.0 and 1.0
source

pub fn sample_token_greedy(&self, token_data: LlamaTokenDataArray) -> LlamaToken

Sample a token greedily.

Panics
  • [token_data] is empty
source

pub fn sample_token_softmax(&self, token_data: &mut LlamaTokenDataArray)

Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.

source§

impl<'model> LlamaContext<'model>

source

pub fn n_ctx(&self) -> u32

Gets the size of the context.

source

pub fn decode(&mut self, batch: &mut LlamaBatch) -> Result<(), DecodeError>

Decodes the batch.

Errors
  • DecodeError if the decoding failed.
Panics
  • the returned [c_int] from llama-cpp does not fit into a i32 (this should never happen on most systems)
source

pub fn candidates_ith( &self, i: i32 ) -> impl Iterator<Item = LlamaTokenData> + '_

Get the logits for the ith token in the context.

Panics
  • logit i is not initialized.
source

pub fn get_logits_ith(&self, i: i32) -> &[f32]

Get the logits for the ith token in the context.

Panics
  • i is greater than n_ctx
  • n_vocab does not fit into a usize
  • logit i is not initialized.
source

pub fn reset_timings(&mut self)

Reset the timings for the context.

source

pub fn timings(&mut self) -> LlamaTimings

Returns the timings for the context.

Trait Implementations§

source§

impl Debug for LlamaContext<'_>

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl Drop for LlamaContext<'_>

source§

fn drop(&mut self)

Executes the destructor for this type. Read more

Auto Trait Implementations§

§

impl<'a> RefUnwindSafe for LlamaContext<'a>

§

impl<'a> !Send for LlamaContext<'a>

§

impl<'a> !Sync for LlamaContext<'a>

§

impl<'a> Unpin for LlamaContext<'a>

§

impl<'a> UnwindSafe for LlamaContext<'a>

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more