pub struct Cache { /* private fields */ }Expand description
Abstraction over cosine and sine tables, kv-caching and attention masking.
Implementations§
Source§impl Cache
impl Cache
Sourcepub fn new(
use_kv_cache: bool,
dtype: DType,
config: &Config,
device: &Device,
) -> Result<Self>
pub fn new( use_kv_cache: bool, dtype: DType, config: &Config, device: &Device, ) -> Result<Self>
Creates a new cache instance with the provided configuration.
Set use_kv_cache to false to disable kv-caching.
Sourcepub fn with_kv_cache(&self) -> bool
pub fn with_kv_cache(&self) -> bool
Return true if kv-caching is enabled.
Sourcepub fn cosine(
&self,
index_pos: usize,
seq_len: usize,
device: &Device,
) -> Result<Tensor>
pub fn cosine( &self, index_pos: usize, seq_len: usize, device: &Device, ) -> Result<Tensor>
Return the cached cosine value for the given position and sequence length.
When device differs from the cache’s own device, the result is copied
to that device (enables multi-GPU workers).
Sourcepub fn sine(
&self,
index_pos: usize,
seq_len: usize,
device: &Device,
) -> Result<Tensor>
pub fn sine( &self, index_pos: usize, seq_len: usize, device: &Device, ) -> Result<Tensor>
Return the cached sine value for the given position and sequence length.
Sourcepub fn mask(&mut self, seq_len: usize, device: &Device) -> Result<Tensor>
pub fn mask(&mut self, seq_len: usize, device: &Device) -> Result<Tensor>
Get the attention mask for the given sequence length.
Sourcepub fn process_kv(
&mut self,
block_idx: usize,
k: Tensor,
v: Tensor,
) -> Result<(Tensor, Tensor)>
pub fn process_kv( &mut self, block_idx: usize, k: Tensor, v: Tensor, ) -> Result<(Tensor, Tensor)>
Process the input k and v by either generating their cache entry or applying a previously cached one.
Sourcepub fn get_recurrent_state(&self, block_idx: usize) -> Option<&Tensor>
pub fn get_recurrent_state(&self, block_idx: usize) -> Option<&Tensor>
Get the recurrent state for a linear attention layer.
Sourcepub fn set_recurrent_state(&mut self, block_idx: usize, state: Tensor)
pub fn set_recurrent_state(&mut self, block_idx: usize, state: Tensor)
Set the recurrent state for a linear attention layer.
Sourcepub fn get_conv_state(&self, block_idx: usize) -> Option<&Tensor>
pub fn get_conv_state(&self, block_idx: usize) -> Option<&Tensor>
Get the conv state for a linear attention layer.
Sourcepub fn set_conv_state(&mut self, block_idx: usize, state: Tensor)
pub fn set_conv_state(&mut self, block_idx: usize, state: Tensor)
Set the conv state for a linear attention layer.
Trait Implementations§
Auto Trait Implementations§
impl Freeze for Cache
impl !RefUnwindSafe for Cache
impl Send for Cache
impl Sync for Cache
impl Unpin for Cache
impl UnsafeUnpin for Cache
impl !UnwindSafe for Cache
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more