pub trait AdvancedKvCacheManager: KvCacheManager {
// Required methods
async fn enable_prefix_caching(
&self,
config: PrefixCacheConfig,
) -> Result<()>;
async fn share_prefix(
&self,
source: RequestId,
target: RequestId,
shared_tokens: usize,
) -> Result<()>;
async fn swap_out(&self, request_id: RequestId) -> Result<()>;
async fn swap_in(&self, request_id: RequestId) -> Result<()>;
async fn compress_cache(
&self,
request_id: RequestId,
compression_ratio: f32,
) -> Result<()>;
fn compression_stats(&self) -> CompressionStats;
}Expand description
Advanced KV cache capabilities
Required Methods§
Sourceasync fn enable_prefix_caching(&self, config: PrefixCacheConfig) -> Result<()>
async fn enable_prefix_caching(&self, config: PrefixCacheConfig) -> Result<()>
Enable prefix caching for common prompt prefixes
Share cache blocks between compatible sequences
Sourceasync fn swap_out(&self, request_id: RequestId) -> Result<()>
async fn swap_out(&self, request_id: RequestId) -> Result<()>
Swap cache from GPU to CPU to free GPU memory
Sourceasync fn compress_cache(
&self,
request_id: RequestId,
compression_ratio: f32,
) -> Result<()>
async fn compress_cache( &self, request_id: RequestId, compression_ratio: f32, ) -> Result<()>
Compress cache to reduce memory usage
Sourcefn compression_stats(&self) -> CompressionStats
fn compression_stats(&self) -> CompressionStats
Get cache compression statistics
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety".