pub struct KVCacheManager {
pub num_prefix_cache_hits: u64,
pub num_prefix_cache_misses: u64,
pub hit_size_count: u64,
pub hit_size_sum: u64,
/* private fields */
}Expand description
Manages KV cache blocks for all requests
Fields§
§num_prefix_cache_hits: u64Metrics
num_prefix_cache_misses: u64§hit_size_count: u64§hit_size_sum: u64Implementations§
Source§impl KVCacheManager
impl KVCacheManager
Sourcepub fn new(
kv_cache_capacity: u64,
block_size: u32,
kv_cache_bytes_per_token: u64,
enable_prefix_caching: bool,
) -> Self
pub fn new( kv_cache_capacity: u64, block_size: u32, kv_cache_bytes_per_token: u64, enable_prefix_caching: bool, ) -> Self
Create a new KV cache manager
Sourcepub fn allocate_blocks(
&mut self,
request: &Request,
num_tokens: u32,
) -> Option<Vec<BlockId>>
pub fn allocate_blocks( &mut self, request: &Request, num_tokens: u32, ) -> Option<Vec<BlockId>>
Try to allocate blocks for a request
Returns Some(Vec
Sourcepub fn free_blocks(&mut self, block_ids: &[BlockId])
pub fn free_blocks(&mut self, block_ids: &[BlockId])
Free blocks from a request (due to preemption or completion)
Sourcepub fn num_free_blocks(&self) -> usize
pub fn num_free_blocks(&self) -> usize
Get number of free blocks
Sourcepub fn total_blocks(&self) -> usize
pub fn total_blocks(&self) -> usize
Get total number of blocks
Sourcepub fn utilization(&self) -> f64
pub fn utilization(&self) -> f64
Get cache utilization (0.0 to 1.0)
Sourcepub fn peek_prefix_cache(&mut self, request: &Request) -> u32
pub fn peek_prefix_cache(&mut self, request: &Request) -> u32
Check for prefix cache hits Returns the number of tokens that can be served from the cache
pub fn query_prefix_cache(&mut self, request: &Request) -> u32
Auto Trait Implementations§
impl Freeze for KVCacheManager
impl RefUnwindSafe for KVCacheManager
impl Send for KVCacheManager
impl Sync for KVCacheManager
impl Unpin for KVCacheManager
impl UnsafeUnpin for KVCacheManager
impl UnwindSafe for KVCacheManager
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more