pub trait KvCacheManager: Send + Sync {
// Required methods
fn allocate<'life0, 'life1, 'async_trait>(
&'life0 self,
request: &'life1 AllocationRequest,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn KvCacheHandle>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
fn extend<'life0, 'life1, 'async_trait>(
&'life0 self,
handle: &'life1 mut dyn KvCacheHandle,
additional_tokens: usize,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
fn deallocate<'life0, 'async_trait>(
&'life0 self,
request_id: RequestId,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn can_allocate(&self, request: &AllocationRequest) -> bool;
fn stats(&self) -> CacheManagerStats;
fn gc<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<CacheGcStats>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn set_pressure_callback(
&self,
callback: Box<dyn Fn(MemoryPressure) + Send + Sync>,
);
fn get_handle(
&self,
request_id: RequestId,
) -> Option<Arc<dyn KvCacheHandle>>;
fn list_handles(&self) -> Vec<(RequestId, Arc<dyn KvCacheHandle>)>;
}Expand description
KV cache manager for allocation and lifecycle management
Required Methods§
Sourcefn allocate<'life0, 'life1, 'async_trait>(
&'life0 self,
request: &'life1 AllocationRequest,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn KvCacheHandle>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn allocate<'life0, 'life1, 'async_trait>(
&'life0 self,
request: &'life1 AllocationRequest,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn KvCacheHandle>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Allocate cache for new sequence
Sourcefn extend<'life0, 'life1, 'async_trait>(
&'life0 self,
handle: &'life1 mut dyn KvCacheHandle,
additional_tokens: usize,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn extend<'life0, 'life1, 'async_trait>(
&'life0 self,
handle: &'life1 mut dyn KvCacheHandle,
additional_tokens: usize,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Extend existing cache to accommodate more tokens
Sourcefn deallocate<'life0, 'async_trait>(
&'life0 self,
request_id: RequestId,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn deallocate<'life0, 'async_trait>(
&'life0 self,
request_id: RequestId,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Deallocate cache (handle becomes invalid)
Sourcefn can_allocate(&self, request: &AllocationRequest) -> bool
fn can_allocate(&self, request: &AllocationRequest) -> bool
Check if can allocate requested cache size
Sourcefn stats(&self) -> CacheManagerStats
fn stats(&self) -> CacheManagerStats
Get cache statistics
Sourcefn gc<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<CacheGcStats>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn gc<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<CacheGcStats>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Force garbage collection of unused caches
Sourcefn set_pressure_callback(
&self,
callback: Box<dyn Fn(MemoryPressure) + Send + Sync>,
)
fn set_pressure_callback( &self, callback: Box<dyn Fn(MemoryPressure) + Send + Sync>, )
Set memory pressure callback
Sourcefn get_handle(&self, request_id: RequestId) -> Option<Arc<dyn KvCacheHandle>>
fn get_handle(&self, request_id: RequestId) -> Option<Arc<dyn KvCacheHandle>>
Get handle for existing request (if exists)
Sourcefn list_handles(&self) -> Vec<(RequestId, Arc<dyn KvCacheHandle>)>
fn list_handles(&self) -> Vec<(RequestId, Arc<dyn KvCacheHandle>)>
List all active cache handles