pub struct ModelConfig {
pub name: String,
pub num_parameters: u64,
pub num_active_parameters: Option<u64>,
pub num_layers: u32,
pub hidden_dim: u32,
pub num_heads: u32,
pub num_kv_heads: Option<u32>,
pub max_seq_len: u32,
pub sliding_window: Option<u32>,
pub num_sliding_layers: Option<u32>,
pub kv_cache_bytes_per_token: u64,
}Fields§
§name: StringModel name
num_parameters: u64Total parameters in the model (all parameters, including inactive experts in MoE)
num_active_parameters: Option<u64>Active parameters used during inference (for MoE models with sparse activation) If not specified, defaults to num_parameters (dense models)
num_layers: u32Number of transformer layers
Hidden dimension
num_heads: u32Number of attention heads
num_kv_heads: Option<u32>Number of KV heads (for GQA/MQA). If not specified, defaults to num_heads (MHA)
max_seq_len: u32Maximum sequence length supported
sliding_window: Option<u32>Sliding window size for sliding window attention layers (None = no sliding window) Only applies to layers marked as using sliding window attention
num_sliding_layers: Option<u32>Number of layers using sliding window attention (rest use full attention) If not specified, defaults to 0 (all layers use full attention)
kv_cache_bytes_per_token: u64KV cache size per token per layer (in bytes) For GQA: 2 * num_kv_heads * head_dim * bytes_per_param * num_layers For MHA: 2 * num_heads * head_dim * bytes_per_param * num_layers
Implementations§
Source§impl ModelConfig
impl ModelConfig
Sourcepub fn active_parameters(&self) -> u64
pub fn active_parameters(&self) -> u64
Get the number of active parameters (defaults to total parameters for dense models)
Sourcepub fn compute_kv_cache_size(&mut self, bytes_per_param: u32)
pub fn compute_kv_cache_size(&mut self, bytes_per_param: u32)
Calculate and set the KV cache size per token For models with sliding window attention, this calculates an average based on typical usage
Sourcepub fn with_kv_cache_size(self, bytes_per_param: u32) -> Self
pub fn with_kv_cache_size(self, bytes_per_param: u32) -> Self
Initialize with KV cache size pre-computed
Sourcepub fn kv_cache_size_for_sequence(&self, seq_len: u32) -> u64
pub fn kv_cache_size_for_sequence(&self, seq_len: u32) -> u64
Calculate total KV cache size for a sequence, accounting for sliding window
Trait Implementations§
Source§impl Clone for ModelConfig
impl Clone for ModelConfig
Source§fn clone(&self) -> ModelConfig
fn clone(&self) -> ModelConfig
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more