pub struct KVCacheConfig {
pub num_layers: usize,
pub num_heads: usize,
pub head_dim: usize,
pub max_seq_len: usize,
pub max_batch_size: usize,
pub enabled: bool,
}Expand description
Configuration for KV-cache
Fields§
§num_layers: usizeNumber of layers in the model
num_heads: usizeNumber of attention heads per layer
head_dim: usizeDimension per attention head (d_k)
max_seq_len: usizeMaximum sequence length to cache
max_batch_size: usizeMaximum batch size
enabled: boolWhether to enable cache
Implementations§
Source§impl KVCacheConfig
impl KVCacheConfig
Sourcepub fn new(num_layers: usize, num_heads: usize, head_dim: usize) -> Self
pub fn new(num_layers: usize, num_heads: usize, head_dim: usize) -> Self
Create a new KV-cache configuration
Sourcepub fn with_max_seq_len(self, max_seq_len: usize) -> Self
pub fn with_max_seq_len(self, max_seq_len: usize) -> Self
Set maximum sequence length
Sourcepub fn with_max_batch_size(self, max_batch_size: usize) -> Self
pub fn with_max_batch_size(self, max_batch_size: usize) -> Self
Set maximum batch size
Sourcepub fn with_enabled(self, enabled: bool) -> Self
pub fn with_enabled(self, enabled: bool) -> Self
Enable or disable cache
Sourcepub fn memory_usage(&self) -> usize
pub fn memory_usage(&self) -> usize
Calculate memory usage in bytes
Sourcepub fn memory_usage_mb(&self) -> f64
pub fn memory_usage_mb(&self) -> f64
Human-readable memory usage
Trait Implementations§
Source§impl Clone for KVCacheConfig
impl Clone for KVCacheConfig
Source§fn clone(&self) -> KVCacheConfig
fn clone(&self) -> KVCacheConfig
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for KVCacheConfig
impl Debug for KVCacheConfig
Source§impl<'de> Deserialize<'de> for KVCacheConfig
impl<'de> Deserialize<'de> for KVCacheConfig
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl Freeze for KVCacheConfig
impl RefUnwindSafe for KVCacheConfig
impl Send for KVCacheConfig
impl Sync for KVCacheConfig
impl Unpin for KVCacheConfig
impl UnwindSafe for KVCacheConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more