Skip to main content

KvCacheHandle

Trait KvCacheHandle 

Source
pub trait KvCacheHandle:
    Send
    + Sync
    + Debug {
Show 15 methods // Required methods fn block_table(&self) -> &BlockTable; fn block_table_mut(&mut self) -> &mut BlockTable; fn as_any(&self) -> &dyn Any; fn device(&self) -> Device; fn num_layers(&self) -> usize; fn num_heads(&self) -> usize; fn head_dim(&self) -> usize; fn key_cache(&self, layer: usize) -> Result<Option<TensorRef>>; fn value_cache(&self, layer: usize) -> Result<Option<TensorRef>>; fn clone_handle(&self) -> Result<Arc<dyn KvCacheHandle>>; fn stats(&self) -> CacheHandleStats; fn is_valid(&self) -> bool; fn cache_id(&self) -> String; // Provided methods fn num_tokens(&self) -> usize { ... } fn kv_cache( &self, layer: usize, ) -> Result<(Option<TensorRef>, Option<TensorRef>)> { ... }
}
Expand description

KV cache handle providing access to cached key-value states

Required Methods§

Source

fn block_table(&self) -> &BlockTable

Get block table for this cache

Source

fn block_table_mut(&mut self) -> &mut BlockTable

Get mutable block table (for extending)

Source

fn as_any(&self) -> &dyn Any

Downcast support for backend-specific handles

Source

fn device(&self) -> Device

Get device where cache resides

Source

fn num_layers(&self) -> usize

Get number of layers cached

Source

fn num_heads(&self) -> usize

Get number of attention heads

Source

fn head_dim(&self) -> usize

Get head dimension

Source

fn key_cache(&self, layer: usize) -> Result<Option<TensorRef>>

Get key cache for specific layer (returns tensor reference)

Source

fn value_cache(&self, layer: usize) -> Result<Option<TensorRef>>

Get value cache for specific layer

Source

fn clone_handle(&self) -> Result<Arc<dyn KvCacheHandle>>

Clone handle (creates new reference, not deep copy)

Source

fn stats(&self) -> CacheHandleStats

Get cache statistics

Source

fn is_valid(&self) -> bool

Check if cache is valid and accessible

Source

fn cache_id(&self) -> String

Get unique identifier for this cache instance

Provided Methods§

Source

fn num_tokens(&self) -> usize

Get number of tokens stored in cache

Source

fn kv_cache( &self, layer: usize, ) -> Result<(Option<TensorRef>, Option<TensorRef>)>

Get both key and value caches for layer

Implementors§