Skip to main content

AdvancedKvCacheManager

Trait AdvancedKvCacheManager 

Source
pub trait AdvancedKvCacheManager: KvCacheManager {
    // Required methods
    async fn enable_prefix_caching(
        &self,
        config: PrefixCacheConfig,
    ) -> Result<()>;
    async fn share_prefix(
        &self,
        source: RequestId,
        target: RequestId,
        shared_tokens: usize,
    ) -> Result<()>;
    async fn swap_out(&self, request_id: RequestId) -> Result<()>;
    async fn swap_in(&self, request_id: RequestId) -> Result<()>;
    async fn compress_cache(
        &self,
        request_id: RequestId,
        compression_ratio: f32,
    ) -> Result<()>;
    fn compression_stats(&self) -> CompressionStats;
}
Expand description

Advanced KV cache capabilities

Required Methods§

Source

async fn enable_prefix_caching(&self, config: PrefixCacheConfig) -> Result<()>

Enable prefix caching for common prompt prefixes

Source

async fn share_prefix( &self, source: RequestId, target: RequestId, shared_tokens: usize, ) -> Result<()>

Share cache blocks between compatible sequences

Source

async fn swap_out(&self, request_id: RequestId) -> Result<()>

Swap cache from GPU to CPU to free GPU memory

Source

async fn swap_in(&self, request_id: RequestId) -> Result<()>

Swap cache from CPU back to GPU

Source

async fn compress_cache( &self, request_id: RequestId, compression_ratio: f32, ) -> Result<()>

Compress cache to reduce memory usage

Source

fn compression_stats(&self) -> CompressionStats

Get cache compression statistics

Dyn Compatibility§

This trait is not dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety".

Implementors§