Struct LlamaBatch

Source

pub struct LlamaBatch<'tokens> {
    pub initialized_logits: Vec<i32>,
    pub llama_batch: llama_batch,
    /* private fields */
}

Expand description

A safe wrapper around llama_batch.

PartialEq is intentionally not implemented because the underlying llama_batch from the C API contains raw pointers whose address comparison would be meaningless.

Fields§

§initialized_logits: Vec<i32>

The logits that are initialized. Used by [LlamaContext] to ensure that only initialized logits are accessed.

§llama_batch: llama_batch

The underlying llama_batch from the C API.

Implementations§

Source §

impl<'tokens> LlamaBatch<'tokens>

Source

pub fn clear(&mut self)

Clear the batch. This does not free the memory associated with the batch, but it does reset the number of tokens to 0.

Source

pub fn add( &mut self, LlamaToken: LlamaToken, pos: llama_pos, seq_ids: &[i32], logits: bool, ) -> Result<(), BatchAddError>

add a token to the batch for sequences seq_ids at position pos. If logits is true, the token will be initialized and can be read from after the next decode.

§Errors

Returns an error if there is insufficient space in the buffer or if integer conversions fail.

Source

pub fn add_sequence( &mut self, tokens: &[LlamaToken], seq_id: i32, logits_all: bool, ) -> Result<(), BatchAddError>

Add a sequence of tokens to the batch for the given sequence id. If logits_all is true, the tokens will be initialized and can be read from after the next decode.

Either way the last token in the sequence will have its logits set to true.

§Errors

Returns an error if there is insufficient space in the buffer or if integer conversions fail.

Source

pub fn new(n_tokens: usize, n_seq_max: i32) -> Result<Self, BatchAddError>

Create a new LlamaBatch that can contain up to n_tokens tokens.

§Arguments

n_tokens: the maximum number of tokens that can be added to the batch
n_seq_max: the maximum number of sequences that can be added to the batch (generally 1 unless you know what you are doing)

§Errors

Returns an error if n_tokens exceeds i32::MAX.

Source

pub fn get_one(tokens: &'tokens [LlamaToken]) -> Result<Self, BatchAddError>

llama_batch_get_one Return batch for single sequence of tokens

NOTE: this is a helper function to facilitate transition to the new batch API

§Errors

Returns an error if the provided token buffer is empty or if integer conversions fail.

Source

pub const fn n_tokens(&self) -> i32

Returns the number of tokens in the batch.

Trait Implementations§

Source §

impl<'tokens> Debug for LlamaBatch<'tokens>

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Drop for LlamaBatch<'_>

Source §

fn drop(&mut self)

Drops the LlamaBatch.

let batch = LlamaBatch::new(512, 1)?;
// frees the memory associated with the batch. (allocated by llama.cpp)
drop(batch);

Auto Trait Implementations§

§

impl<'tokens> UnwindSafe for LlamaBatch<'tokens>

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Struct LlamaBatch Copy item path

Fields§

Implementations§

impl<'tokens> LlamaBatch<'tokens>

pub fn clear(&mut self)

pub fn add( &mut self, LlamaToken: LlamaToken, pos: llama_pos, seq_ids: &[i32], logits: bool, ) -> Result<(), BatchAddError>

§Errors

pub fn add_sequence( &mut self, tokens: &[LlamaToken], seq_id: i32, logits_all: bool, ) -> Result<(), BatchAddError>

§Errors

pub fn new(n_tokens: usize, n_seq_max: i32) -> Result<Self, BatchAddError>

§Arguments

§Errors

pub fn get_one(tokens: &'tokens [LlamaToken]) -> Result<Self, BatchAddError>

§Errors

pub const fn n_tokens(&self) -> i32

Trait Implementations§

impl<'tokens> Debug for LlamaBatch<'tokens>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Drop for LlamaBatch<'_>

fn drop(&mut self)

Auto Trait Implementations§

impl<'tokens> Freeze for LlamaBatch<'tokens>

impl<'tokens> RefUnwindSafe for LlamaBatch<'tokens>

impl<'tokens> !Send for LlamaBatch<'tokens>

impl<'tokens> !Sync for LlamaBatch<'tokens>

impl<'tokens> Unpin for LlamaBatch<'tokens>

impl<'tokens> UnsafeUnpin for LlamaBatch<'tokens>

impl<'tokens> UnwindSafe for LlamaBatch<'tokens>

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct LlamaBatch

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,