pub struct KVCache { /* private fields */ }Expand description
Key-Value cache for efficient transformer inference
Implementations§
Source§impl KVCache
impl KVCache
Sourcepub fn from_config(config: KVCacheConfig) -> Result<Self>
pub fn from_config(config: KVCacheConfig) -> Result<Self>
Create KV-cache from configuration
Sourcepub fn config(&self) -> &KVCacheConfig
pub fn config(&self) -> &KVCacheConfig
Get configuration
Sourcepub fn is_enabled(&self) -> bool
pub fn is_enabled(&self) -> bool
Check if cache is enabled
Sourcepub fn init_layer(&mut self, layer_idx: usize, batch_size: usize) -> Result<()>
pub fn init_layer(&mut self, layer_idx: usize, batch_size: usize) -> Result<()>
Initialize cache for a layer
Sourcepub fn update_layer(
&mut self,
layer_idx: usize,
new_keys: Vec<f32>,
new_values: Vec<f32>,
) -> Result<()>
pub fn update_layer( &mut self, layer_idx: usize, new_keys: Vec<f32>, new_values: Vec<f32>, ) -> Result<()>
Update cache for a layer with new keys and values
Sourcepub fn get_layer(&self, layer_idx: usize) -> Result<(&[f32], &[f32])>
pub fn get_layer(&self, layer_idx: usize) -> Result<(&[f32], &[f32])>
Get cached keys and values for a layer
Sourcepub fn get_seq_len(&self, layer_idx: usize) -> Result<usize>
pub fn get_seq_len(&self, layer_idx: usize) -> Result<usize>
Get sequence length for a layer
Sourcepub fn clear_layer(&mut self, layer_idx: usize)
pub fn clear_layer(&mut self, layer_idx: usize)
Clear cache for a specific layer
Sourcepub fn num_cached_layers(&self) -> usize
pub fn num_cached_layers(&self) -> usize
Get number of cached layers
Sourcepub fn current_memory_usage(&self) -> usize
pub fn current_memory_usage(&self) -> usize
Calculate current memory usage
Sourcepub fn current_memory_usage_mb(&self) -> f64
pub fn current_memory_usage_mb(&self) -> f64
Calculate memory usage in MB
Sourcepub fn stats(&self) -> CacheStats
pub fn stats(&self) -> CacheStats
Get cache statistics
Trait Implementations§
Auto Trait Implementations§
impl Freeze for KVCache
impl RefUnwindSafe for KVCache
impl Send for KVCache
impl Sync for KVCache
impl Unpin for KVCache
impl UnwindSafe for KVCache
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more