pub trait KvCacheHandleInterface:
Send
+ Sync
+ Debug {
Show 15 methods
// Required methods
fn block_table(&self) -> &BlockTable;
fn block_table_mut(&mut self) -> &mut BlockTable;
fn as_any(&self) -> &(dyn Any + 'static);
fn device(&self) -> Device;
fn num_layers(&self) -> usize;
fn num_heads(&self) -> usize;
fn head_dim(&self) -> usize;
fn key_cache(
&self,
layer: usize,
) -> Result<Option<Arc<dyn TensorLike>>, FerrumError>;
fn value_cache(
&self,
layer: usize,
) -> Result<Option<Arc<dyn TensorLike>>, FerrumError>;
fn clone_handle(&self) -> Result<Arc<dyn KvCacheHandle>, FerrumError>;
fn stats(&self) -> CacheHandleStats;
fn is_valid(&self) -> bool;
fn cache_id(&self) -> String;
// Provided methods
fn num_tokens(&self) -> usize { ... }
fn kv_cache(
&self,
layer: usize,
) -> Result<(Option<Arc<dyn TensorLike>>, Option<Arc<dyn TensorLike>>), FerrumError> { ... }
}Expand description
KV cache handle providing access to cached key-value states
Required Methods§
Sourcefn block_table(&self) -> &BlockTable
fn block_table(&self) -> &BlockTable
Get block table for this cache
Sourcefn block_table_mut(&mut self) -> &mut BlockTable
fn block_table_mut(&mut self) -> &mut BlockTable
Get mutable block table (for extending)
Sourcefn num_layers(&self) -> usize
fn num_layers(&self) -> usize
Get number of layers cached
Sourcefn key_cache(
&self,
layer: usize,
) -> Result<Option<Arc<dyn TensorLike>>, FerrumError>
fn key_cache( &self, layer: usize, ) -> Result<Option<Arc<dyn TensorLike>>, FerrumError>
Get key cache for specific layer (returns tensor reference)
Sourcefn value_cache(
&self,
layer: usize,
) -> Result<Option<Arc<dyn TensorLike>>, FerrumError>
fn value_cache( &self, layer: usize, ) -> Result<Option<Arc<dyn TensorLike>>, FerrumError>
Get value cache for specific layer
Sourcefn clone_handle(&self) -> Result<Arc<dyn KvCacheHandle>, FerrumError>
fn clone_handle(&self) -> Result<Arc<dyn KvCacheHandle>, FerrumError>
Clone handle (creates new reference, not deep copy)
Sourcefn stats(&self) -> CacheHandleStats
fn stats(&self) -> CacheHandleStats
Get cache statistics
Provided Methods§
Sourcefn num_tokens(&self) -> usize
fn num_tokens(&self) -> usize
Get number of tokens stored in cache
Sourcefn kv_cache(
&self,
layer: usize,
) -> Result<(Option<Arc<dyn TensorLike>>, Option<Arc<dyn TensorLike>>), FerrumError>
fn kv_cache( &self, layer: usize, ) -> Result<(Option<Arc<dyn TensorLike>>, Option<Arc<dyn TensorLike>>), FerrumError>
Get both key and value caches for layer
Dyn Compatibility§
This trait is dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety".