Skip to main content

AdvancedKvCacheManager

Trait AdvancedKvCacheManager 

Source
pub trait AdvancedKvCacheManager: KvCacheManager {
    // Required methods
    async fn enable_prefix_caching(
        &self,
        config: PrefixCacheConfig,
    ) -> Result<()>;
    async fn share_prefix(
        &self,
        source: RequestId,
        target: RequestId,
        shared_tokens: usize,
    ) -> Result<()>;
    async fn swap_out(&self, request_id: RequestId) -> Result<()>;
    async fn swap_in(&self, request_id: RequestId) -> Result<()>;
    async fn compress_cache(
        &self,
        request_id: RequestId,
        compression_ratio: f32,
    ) -> Result<()>;
    fn compression_stats(&self) -> CompressionStats;
}
Expand description

Advanced KV cache capabilities

Required Methods§

Source

async fn enable_prefix_caching(&self, config: PrefixCacheConfig) -> Result<()>

Enable prefix caching for common prompt prefixes

Source

async fn share_prefix( &self, source: RequestId, target: RequestId, shared_tokens: usize, ) -> Result<()>

Share cache blocks between compatible sequences

Source

async fn swap_out(&self, request_id: RequestId) -> Result<()>

Swap cache from GPU to CPU to free GPU memory

Source

async fn swap_in(&self, request_id: RequestId) -> Result<()>

Swap cache from CPU back to GPU

Source

async fn compress_cache( &self, request_id: RequestId, compression_ratio: f32, ) -> Result<()>

Compress cache to reduce memory usage

Source

fn compression_stats(&self) -> CompressionStats

Get cache compression statistics

Dyn Compatibility§

This trait is not dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.

Implementors§