pub struct PagedKvCache { /* private fields */ }Expand description
Paged KV cache for efficient memory management.
Based on vLLM’s PagedAttention algorithm. Manages KV cache memory using fixed-size blocks to prevent fragmentation and enable efficient memory sharing.
Implementations§
Source§impl PagedKvCache
impl PagedKvCache
Sourcepub fn new(
num_blocks: usize,
block_size: usize,
num_heads: usize,
head_dim: usize,
) -> Self
pub fn new( num_blocks: usize, block_size: usize, num_heads: usize, head_dim: usize, ) -> Self
Create a new PagedKvCache.
§Arguments
num_blocks: Total number of physical blocksblock_size: Tokens per blocknum_heads: Number of attention headshead_dim: Dimension of each head
Sourcepub fn with_eviction_strategy(self, strategy: EvictionStrategy) -> Self
pub fn with_eviction_strategy(self, strategy: EvictionStrategy) -> Self
Set eviction strategy.
Sourcepub fn with_eviction_threshold(self, threshold: f64) -> Self
pub fn with_eviction_threshold(self, threshold: f64) -> Self
Set eviction threshold (0.0-1.0).
Sourcepub fn block_size(&self) -> usize
pub fn block_size(&self) -> usize
Get block size.
Sourcepub fn total_blocks(&self) -> usize
pub fn total_blocks(&self) -> usize
Get total number of blocks.
Sourcepub fn free_block_count(&self) -> usize
pub fn free_block_count(&self) -> usize
Get number of free blocks.
Sourcepub fn used_block_count(&self) -> usize
pub fn used_block_count(&self) -> usize
Get number of used blocks.
Sourcepub fn utilization(&self) -> f64
pub fn utilization(&self) -> f64
Memory utilization percentage (0.0-1.0).
Sourcepub fn block_memory_bytes(&self) -> usize
pub fn block_memory_bytes(&self) -> usize
Calculate memory for a block in bytes.
Sourcepub fn total_memory_bytes(&self) -> usize
pub fn total_memory_bytes(&self) -> usize
Total memory capacity in bytes.
Sourcepub fn used_memory_bytes(&self) -> usize
pub fn used_memory_bytes(&self) -> usize
Used memory in bytes.
Sourcepub fn needs_eviction(&self) -> bool
pub fn needs_eviction(&self) -> bool
Check if eviction is needed.
Sourcepub fn num_sequences(&self) -> usize
pub fn num_sequences(&self) -> usize
Get number of active sequences.
Sourcepub fn get_sequence(&self, seq_id: SeqId) -> Option<&SequenceInfo>
pub fn get_sequence(&self, seq_id: SeqId) -> Option<&SequenceInfo>
Get sequence info.
Sourcepub fn stats(&self) -> &CacheStats
pub fn stats(&self) -> &CacheStats
Get cache statistics.
Sourcepub fn eviction_strategy(&self) -> &EvictionStrategy
pub fn eviction_strategy(&self) -> &EvictionStrategy
Get eviction strategy.
Sourcepub fn allocate(
&mut self,
seq_id: SeqId,
num_tokens: usize,
) -> PagedKvResult<()>
pub fn allocate( &mut self, seq_id: SeqId, num_tokens: usize, ) -> PagedKvResult<()>
Allocate blocks for a new sequence.
Sourcepub fn append(
&mut self,
seq_id: SeqId,
num_new_tokens: usize,
) -> PagedKvResult<()>
pub fn append( &mut self, seq_id: SeqId, num_new_tokens: usize, ) -> PagedKvResult<()>
Append tokens to an existing sequence.
Sourcepub fn free(&mut self, seq_id: SeqId) -> PagedKvResult<()>
pub fn free(&mut self, seq_id: SeqId) -> PagedKvResult<()>
Free all blocks for a sequence.
Sourcepub fn fork(&mut self, src_seq: SeqId, dst_seq: SeqId) -> PagedKvResult<()>
pub fn fork(&mut self, src_seq: SeqId, dst_seq: SeqId) -> PagedKvResult<()>
Copy-on-write fork for beam search.
Creates a new sequence that shares blocks with the source sequence. Blocks are only copied when modified (copy-on-write).
Sourcepub fn select_eviction_target(&self) -> Option<SeqId>
pub fn select_eviction_target(&self) -> Option<SeqId>
Select sequence to evict based on strategy.
Sourcepub fn evict(&mut self) -> PagedKvResult<SeqId>
pub fn evict(&mut self) -> PagedKvResult<SeqId>
Evict a sequence to free memory.
Sourcepub fn evict_to_threshold(
&mut self,
target_util: f64,
) -> PagedKvResult<Vec<SeqId>>
pub fn evict_to_threshold( &mut self, target_util: f64, ) -> PagedKvResult<Vec<SeqId>>
Evict until memory utilization is below threshold.
Sourcepub fn apply_streaming_llm(
&mut self,
seq_id: SeqId,
sink_tokens: usize,
window_tokens: usize,
) -> PagedKvResult<usize>
pub fn apply_streaming_llm( &mut self, seq_id: SeqId, sink_tokens: usize, window_tokens: usize, ) -> PagedKvResult<usize>
Apply StreamingLLM eviction to a sequence.
Keeps sink tokens at the beginning and a recent window at the end, evicting middle tokens.
Sourcepub fn sequence_ids(&self) -> Vec<SeqId>
pub fn sequence_ids(&self) -> Vec<SeqId>
Get all sequence IDs.
Trait Implementations§
Source§impl Debug for PagedKvCache
impl Debug for PagedKvCache
Auto Trait Implementations§
impl Freeze for PagedKvCache
impl RefUnwindSafe for PagedKvCache
impl Send for PagedKvCache
impl Sync for PagedKvCache
impl Unpin for PagedKvCache
impl UnsafeUnpin for PagedKvCache
impl UnwindSafe for PagedKvCache
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> FmtForward for T
impl<T> FmtForward for T
Source§fn fmt_binary(self) -> FmtBinary<Self>where
Self: Binary,
fn fmt_binary(self) -> FmtBinary<Self>where
Self: Binary,
self to use its Binary implementation when Debug-formatted.Source§fn fmt_display(self) -> FmtDisplay<Self>where
Self: Display,
fn fmt_display(self) -> FmtDisplay<Self>where
Self: Display,
self to use its Display implementation when
Debug-formatted.Source§fn fmt_lower_exp(self) -> FmtLowerExp<Self>where
Self: LowerExp,
fn fmt_lower_exp(self) -> FmtLowerExp<Self>where
Self: LowerExp,
self to use its LowerExp implementation when
Debug-formatted.Source§fn fmt_lower_hex(self) -> FmtLowerHex<Self>where
Self: LowerHex,
fn fmt_lower_hex(self) -> FmtLowerHex<Self>where
Self: LowerHex,
self to use its LowerHex implementation when
Debug-formatted.Source§fn fmt_octal(self) -> FmtOctal<Self>where
Self: Octal,
fn fmt_octal(self) -> FmtOctal<Self>where
Self: Octal,
self to use its Octal implementation when Debug-formatted.Source§fn fmt_pointer(self) -> FmtPointer<Self>where
Self: Pointer,
fn fmt_pointer(self) -> FmtPointer<Self>where
Self: Pointer,
self to use its Pointer implementation when
Debug-formatted.Source§fn fmt_upper_exp(self) -> FmtUpperExp<Self>where
Self: UpperExp,
fn fmt_upper_exp(self) -> FmtUpperExp<Self>where
Self: UpperExp,
self to use its UpperExp implementation when
Debug-formatted.Source§fn fmt_upper_hex(self) -> FmtUpperHex<Self>where
Self: UpperHex,
fn fmt_upper_hex(self) -> FmtUpperHex<Self>where
Self: UpperHex,
self to use its UpperHex implementation when
Debug-formatted.Source§impl<T> Pipe for Twhere
T: ?Sized,
impl<T> Pipe for Twhere
T: ?Sized,
Source§fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> Rwhere
Self: Sized,
fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> Rwhere
Self: Sized,
Source§fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> Rwhere
R: 'a,
fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> Rwhere
R: 'a,
self and passes that borrow into the pipe function. Read moreSource§fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> Rwhere
R: 'a,
fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> Rwhere
R: 'a,
self and passes that borrow into the pipe function. Read moreSource§fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
Source§fn pipe_borrow_mut<'a, B, R>(
&'a mut self,
func: impl FnOnce(&'a mut B) -> R,
) -> R
fn pipe_borrow_mut<'a, B, R>( &'a mut self, func: impl FnOnce(&'a mut B) -> R, ) -> R
Source§fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
self, then passes self.as_ref() into the pipe function.Source§fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
self, then passes self.as_mut() into the pipe
function.Source§fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
self, then passes self.deref() into the pipe function.Source§impl<T> Tap for T
impl<T> Tap for T
Source§fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
Borrow<B> of a value. Read moreSource§fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
BorrowMut<B> of a value. Read moreSource§fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
AsRef<R> view of a value. Read moreSource§fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
AsMut<R> view of a value. Read moreSource§fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
Deref::Target of a value. Read moreSource§fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
Deref::Target of a value. Read moreSource§fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self
fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self
.tap() only in debug builds, and is erased in release builds.Source§fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self
fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self
.tap_mut() only in debug builds, and is erased in release
builds.Source§fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
.tap_borrow() only in debug builds, and is erased in release
builds.Source§fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
.tap_borrow_mut() only in debug builds, and is erased in release
builds.Source§fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
.tap_ref() only in debug builds, and is erased in release
builds.Source§fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
.tap_ref_mut() only in debug builds, and is erased in release
builds.Source§fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
.tap_deref() only in debug builds, and is erased in release
builds.Source§impl<T> ToCompactString for Twhere
T: Display,
impl<T> ToCompactString for Twhere
T: Display,
Source§fn try_to_compact_string(&self) -> Result<CompactString, ToCompactStringError>
fn try_to_compact_string(&self) -> Result<CompactString, ToCompactStringError>
ToCompactString::to_compact_string() Read moreSource§fn to_compact_string(&self) -> CompactString
fn to_compact_string(&self) -> CompactString
CompactString. Read more