pub fn kv_cache_bytes(seq_len: u32, model: &ModelConfig) -> f64Expand description
Calculate memory transfer bytes for KV cache for a given sequence length Formula: kv_bytes = kv_cache_bytes_per_token * seq_len
pub fn kv_cache_bytes(seq_len: u32, model: &ModelConfig) -> f64Calculate memory transfer bytes for KV cache for a given sequence length Formula: kv_bytes = kv_cache_bytes_per_token * seq_len