Struct llama_cpp_2::context::LlamaContext

source ·

pub struct LlamaContext<'a> {
    pub model: &'a LlamaModel,
    /* private fields */
}

Expand description

Safe wrapper around llama_context.

Fields§

§model: &'a LlamaModel

a reference to the contexts model.

Implementations§

source §

impl LlamaContext<'_>

source

pub fn copy_cache(&mut self, src: i32, dest: i32, size: i32)

Copy the cache from one sequence to another.

Parameters

src - The sequence id to copy the cache from.
dest - The sequence id to copy the cache to.
size - The size of the cache to copy.

source

pub fn clear_kv_cache_seq(&mut self, src: i32, p0: Option<u16>, p1: Option<u16>)

Clear the kv cache for the given sequence.

Parameters

src - The sequence id to clear the cache for.
p0 - The start position of the cache to clear. If None, the entire cache is cleared up to [p1].
p1 - The end position of the cache to clear. If None, the entire cache is cleared from [p0].

source §

impl LlamaContext<'_>

source

pub fn sample(&mut self, sampler: Sampler<'_>) -> LlamaToken

Sample a token.

Panics

sampler contains no tokens

source

pub fn grammar_accept_token( &mut self, grammar: &mut LlamaGrammar, token: LlamaToken )

Accept a token into the grammar.

source

pub fn sample_grammar( &mut self, llama_token_data_array: &mut LlamaTokenDataArray, llama_grammar: &LlamaGrammar )

Perform grammar sampling.

source

pub fn sample_temp( &self, token_data: &mut LlamaTokenDataArray, temperature: f32 )

Modify [token_data] in place using temperature sampling.

Panics

[temperature] is not between 0.0 and 1.0

source

pub fn sample_token_greedy(&self, token_data: LlamaTokenDataArray) -> LlamaToken

Sample a token greedily.

Panics

[token_data] is empty

source

pub fn sample_token_softmax(&self, token_data: &mut LlamaTokenDataArray)

Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.

source §

impl<'model> LlamaContext<'model>

source

pub fn n_ctx(&self) -> u32

Gets the size of the context.

source

pub fn decode(&mut self, batch: &mut LlamaBatch) -> Result<(), DecodeError>

Decodes the batch.

Errors

DecodeError if the decoding failed.

Panics

the returned [c_int] from llama-cpp does not fit into a i32 (this should never happen on most systems)

source

pub fn candidates_ith( &self, i: i32 ) -> impl Iterator<Item = LlamaTokenData> + '_

Get the logits for the ith token in the context.

Panics

logit i is not initialized.

source

pub fn get_logits_ith(&self, i: i32) -> &[f32]

Get the logits for the ith token in the context.

Panics

i is greater than n_ctx
n_vocab does not fit into a usize
logit i is not initialized.

source

pub fn reset_timings(&mut self)

Reset the timings for the context.

source

pub fn timings(&mut self) -> LlamaTimings

Returns the timings for the context.

Trait Implementations§

source §

impl Debug for LlamaContext<'_>

source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

source §

impl Drop for LlamaContext<'_>

source §

fn drop(&mut self)

Executes the destructor for this type. Read more

Auto Trait Implementations§

§

impl<'a> UnwindSafe for LlamaContext<'a>

Blanket Implementations§

source §

impl<T> Any for T
where T: 'static + ?Sized,

source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

source §

impl<T> Borrow<T> for T
where T: ?Sized,

source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

source §

impl<T> From<T> for T

source §

fn from(t: T) -> T

Returns the argument unchanged.

source §

impl<T> Instrument for T

source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

source §

impl<T, U> Into for T
where U: From<T>,

source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source §

impl<T, U> TryFrom for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.

source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

§

type Error = >::Error

The type returned in the event of a conversion error.

source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

source §

impl<T> WithSubscriber for T

source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Struct llama_cpp_2::context::LlamaContext

Fields§

Implementations§

impl LlamaContext<'_>

pub fn copy_cache(&mut self, src: i32, dest: i32, size: i32)

pub fn clear_kv_cache_seq(&mut self, src: i32, p0: Option<u16>, p1: Option<u16>)

impl LlamaContext<'_>

pub fn sample(&mut self, sampler: Sampler<'_>) -> LlamaToken

pub fn grammar_accept_token( &mut self, grammar: &mut LlamaGrammar, token: LlamaToken )

pub fn sample_grammar( &mut self, llama_token_data_array: &mut LlamaTokenDataArray, llama_grammar: &LlamaGrammar )

pub fn sample_temp( &self, token_data: &mut LlamaTokenDataArray, temperature: f32 )

pub fn sample_token_greedy(&self, token_data: LlamaTokenDataArray) -> LlamaToken

pub fn sample_token_softmax(&self, token_data: &mut LlamaTokenDataArray)

impl<'model> LlamaContext<'model>

pub fn n_ctx(&self) -> u32

pub fn decode(&mut self, batch: &mut LlamaBatch) -> Result<(), DecodeError>

pub fn candidates_ith( &self, i: i32 ) -> impl Iterator<Item = LlamaTokenData> + '_

pub fn get_logits_ith(&self, i: i32) -> &[f32]

pub fn reset_timings(&mut self)

pub fn timings(&mut self) -> LlamaTimings

Trait Implementations§

impl Debug for LlamaContext<'_>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Drop for LlamaContext<'_>

fn drop(&mut self)

Auto Trait Implementations§

impl<'a> RefUnwindSafe for LlamaContext<'a>

impl<'a> !Send for LlamaContext<'a>

impl<'a> !Sync for LlamaContext<'a>

impl<'a> Unpin for LlamaContext<'a>

impl<'a> UnwindSafe for LlamaContext<'a>

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,