Skip to main content

KvCacheManager

Trait KvCacheManager 

Source
pub trait KvCacheManager: Send + Sync {
    // Required methods
    fn allocate<'life0, 'life1, 'async_trait>(
        &'life0 self,
        request: &'life1 AllocationRequest,
    ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn KvCacheHandle>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait;
    fn extend<'life0, 'life1, 'async_trait>(
        &'life0 self,
        handle: &'life1 mut dyn KvCacheHandle,
        additional_tokens: usize,
    ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait;
    fn deallocate<'life0, 'async_trait>(
        &'life0 self,
        request_id: RequestId,
    ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn can_allocate(&self, request: &AllocationRequest) -> bool;
    fn stats(&self) -> CacheManagerStats;
    fn gc<'life0, 'async_trait>(
        &'life0 self,
    ) -> Pin<Box<dyn Future<Output = Result<CacheGcStats>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn set_pressure_callback(
        &self,
        callback: Box<dyn Fn(MemoryPressure) + Send + Sync>,
    );
    fn get_handle(
        &self,
        request_id: RequestId,
    ) -> Option<Arc<dyn KvCacheHandle>>;
    fn list_handles(&self) -> Vec<(RequestId, Arc<dyn KvCacheHandle>)>;
}
Expand description

KV cache manager for allocation and lifecycle management

Required Methods§

Source

fn allocate<'life0, 'life1, 'async_trait>( &'life0 self, request: &'life1 AllocationRequest, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn KvCacheHandle>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Allocate cache for new sequence

Source

fn extend<'life0, 'life1, 'async_trait>( &'life0 self, handle: &'life1 mut dyn KvCacheHandle, additional_tokens: usize, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Extend existing cache to accommodate more tokens

Source

fn deallocate<'life0, 'async_trait>( &'life0 self, request_id: RequestId, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Deallocate cache (handle becomes invalid)

Source

fn can_allocate(&self, request: &AllocationRequest) -> bool

Check if can allocate requested cache size

Source

fn stats(&self) -> CacheManagerStats

Get cache statistics

Source

fn gc<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Result<CacheGcStats>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Force garbage collection of unused caches

Source

fn set_pressure_callback( &self, callback: Box<dyn Fn(MemoryPressure) + Send + Sync>, )

Set memory pressure callback

Source

fn get_handle(&self, request_id: RequestId) -> Option<Arc<dyn KvCacheHandle>>

Get handle for existing request (if exists)

Source

fn list_handles(&self) -> Vec<(RequestId, Arc<dyn KvCacheHandle>)>

List all active cache handles

Implementors§