pub struct KvCache { /* private fields */ }Expand description
Per-layer KV cache storing FP32 key and value vectors.
Implementations§
Source§impl KvCache
impl KvCache
Sourcepub fn new(
num_layers: usize,
num_kv_heads: usize,
head_dim: usize,
max_seq_len: usize,
) -> Self
pub fn new( num_layers: usize, num_kv_heads: usize, head_dim: usize, max_seq_len: usize, ) -> Self
Create a new KV cache.
Sourcepub fn max_seq_len(&self) -> usize
pub fn max_seq_len(&self) -> usize
Maximum sequence length.
Sourcepub fn store_key(&mut self, layer: usize, head: usize, pos: usize, key: &[f32])
pub fn store_key(&mut self, layer: usize, head: usize, pos: usize, key: &[f32])
Store a key vector for a specific layer, head, and position.
Sourcepub fn store_value(
&mut self,
layer: usize,
head: usize,
pos: usize,
value: &[f32],
)
pub fn store_value( &mut self, layer: usize, head: usize, pos: usize, value: &[f32], )
Store a value vector for a specific layer, head, and position.
Sourcepub fn keys_for(&self, layer: usize, head: usize, seq_len: usize) -> &[f32]
pub fn keys_for(&self, layer: usize, head: usize, seq_len: usize) -> &[f32]
Get all cached keys for a layer and head up to seq_len.
Returns a slice of [seq_len × head_dim] in row-major order.
Sourcepub fn values_for(&self, layer: usize, head: usize, seq_len: usize) -> &[f32]
pub fn values_for(&self, layer: usize, head: usize, seq_len: usize) -> &[f32]
Get all cached values for a layer and head up to seq_len.
Sourcepub fn memory_bytes(&self) -> usize
pub fn memory_bytes(&self) -> usize
Total memory used by this cache in bytes.
Sourcepub fn utilization_ratio(&self) -> f64
pub fn utilization_ratio(&self) -> f64
Utilization ratio: fraction of cache capacity currently used.
Returns a value in [0.0, 1.0].
Sourcepub fn num_layers(&self) -> usize
pub fn num_layers(&self) -> usize
Number of layers in this cache.
Sourcepub fn num_kv_heads(&self) -> usize
pub fn num_kv_heads(&self) -> usize
Number of KV heads per layer.
Sourcepub fn set_seq_len(&mut self, n: usize)
pub fn set_seq_len(&mut self, n: usize)
Manually set the cached sequence length.
Used by the prefix-cache integration when restoring previously
computed KV blocks: after inject_block writes
the block contents, the consumer must call this to advertise the
number of valid positions to subsequent attention computations.
n is clamped to max_seq_len.
Sourcepub fn extract_block(
&self,
layer: usize,
start_pos: usize,
block_size: usize,
) -> (Vec<f32>, Vec<f32>)
pub fn extract_block( &self, layer: usize, start_pos: usize, block_size: usize, ) -> (Vec<f32>, Vec<f32>)
Extract one prefix-cache block worth of KV for a single layer.
Reads block_size consecutive positions starting at start_pos for
every KV head in layer and returns them in [head][pos_in_block][dim]
order, packed as a flat Vec<f32> of length
num_kv_heads * block_size * head_dim.
Mirrors the layout used by crate::prefix_cache::CacheBlock.
Returns (keys, values). If the requested range exceeds
max_seq_len, the trailing positions are returned as zeros.
Sourcepub fn inject_block(
&mut self,
layer: usize,
start_pos: usize,
block_size: usize,
keys: &[f32],
values: &[f32],
)
pub fn inject_block( &mut self, layer: usize, start_pos: usize, block_size: usize, keys: &[f32], values: &[f32], )
Inject a previously extracted block back into the cache for a single layer.
keys and values must have the same [head][pos_in_block][dim]
layout produced by extract_block; they are
expected to be of length num_kv_heads * block_size * head_dim.
Positions outside max_seq_len are silently skipped.
Trait Implementations§
Auto Trait Implementations§
impl Freeze for KvCache
impl RefUnwindSafe for KvCache
impl Send for KvCache
impl Sync for KvCache
impl Unpin for KvCache
impl UnsafeUnpin for KvCache
impl UnwindSafe for KvCache
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more