pub struct ModelConfig {Show 34 fields
pub vocab_size: usize,
pub hidden_size: usize,
pub intermediate_size: usize,
pub num_layers: usize,
pub num_heads: usize,
pub num_kv_heads: usize,
pub head_dim: usize,
pub max_seq_len: usize,
pub norm_eps: f32,
pub rope_config: RopeConfig,
pub use_parallel_residual: bool,
pub hidden_act: ActivationType,
pub attention_bias: bool,
pub mlp_bias: bool,
pub tie_word_embeddings: bool,
pub num_experts: usize,
pub num_experts_per_token: usize,
pub expert_intermediate_size: usize,
pub key_length: usize,
pub value_length: usize,
pub ssm_d_inner: usize,
pub ssm_d_state: usize,
pub ssm_n_group: usize,
pub ssm_dt_rank: usize,
pub ssm_conv_kernel: usize,
pub attn_logit_softcap: f32,
pub final_logit_softcap: f32,
pub sliding_window: usize,
pub has_combined_qkv: bool,
pub uses_layer_norm: bool,
pub uses_gelu: bool,
pub has_ffn_gate: bool,
pub attention_layer_configs: Option<Vec<AttentionLayerConfig>>,
pub kv_source_layer: Option<Vec<usize>>,
}Expand description
Full model configuration
Fields§
§vocab_size: usizeVocabulary size
Hidden dimension (embedding size)
intermediate_size: usizeIntermediate size (FFN dimension, typically 4 * hidden_size or computed)
num_layers: usizeNumber of transformer layers
num_heads: usizeNumber of attention heads
num_kv_heads: usizeNumber of key-value heads (for GQA/MQA)
head_dim: usizeDimension per head
max_seq_len: usizeMaximum sequence length
norm_eps: f32RMS normalization epsilon
rope_config: RopeConfigRoPE configuration
use_parallel_residual: boolWhether to use parallel attention (compute QKV in parallel)
Activation function type
attention_bias: boolWhether there’s a bias in attention projections
mlp_bias: boolWhether there’s a bias in MLP layers
tie_word_embeddings: boolTie word embeddings with output projection
num_experts: usizeNumber of MoE experts (0 = dense model)
num_experts_per_token: usizeNumber of experts activated per token
expert_intermediate_size: usizeExpert FFN intermediate dimension (may differ from dense intermediate_size)
key_length: usizePer-head key dimension (defaults to head_dim if not specified)
value_length: usizePer-head value dimension (defaults to head_dim if not specified)
ssm_d_inner: usizeSSM/DeltaNet inner dimension (0 = no SSM layers)
ssm_d_state: usizeSSM state dimension (per-head key dim for delta-net)
ssm_n_group: usizeSSM group count (number of key heads in delta-net)
ssm_dt_rank: usizeSSM time step rank (number of value heads in delta-net)
ssm_conv_kernel: usizeSSM convolution kernel size
attn_logit_softcap: f32Attention logit soft-capping value (Gemma2: 50.0, 0.0 = disabled)
final_logit_softcap: f32Final logit soft-capping value (Gemma2: 30.0, 0.0 = disabled)
sliding_window: usizeSliding window attention size (0 = disabled)
has_combined_qkv: boolWhether this architecture uses combined QKV tensor
uses_layer_norm: boolWhether this architecture uses LayerNorm instead of RMSNorm
uses_gelu: boolWhether this architecture uses GELU activation
has_ffn_gate: boolWhether this architecture has a gate projection in FFN
attention_layer_configs: Option<Vec<AttentionLayerConfig>>Per-layer attention configs for architectures with heterogeneous attention (e.g., Gemma 4). None means all layers use uniform config.
kv_source_layer: Option<Vec<usize>>Maps layer index to physical KV cache slot. Identity by default. For KV shared layers, multiple indices map to the same slot.
Implementations§
Source§impl ModelConfig
impl ModelConfig
Sourcepub fn num_queries_per_kv(&self) -> usize
pub fn num_queries_per_kv(&self) -> usize
Get the number of query heads per KV head
Sourcepub fn build_attention_layer_configs(
num_layers: usize,
pattern_period: usize,
sliding_head_dim: usize,
sliding_kv_heads: usize,
sliding_rope_freq_base: f32,
sliding_window: usize,
global_head_dim: usize,
global_kv_heads: usize,
global_rope_freq_base: f32,
global_rope_dims: usize,
) -> Vec<AttentionLayerConfig>
pub fn build_attention_layer_configs( num_layers: usize, pattern_period: usize, sliding_head_dim: usize, sliding_kv_heads: usize, sliding_rope_freq_base: f32, sliding_window: usize, global_head_dim: usize, global_kv_heads: usize, global_rope_freq_base: f32, global_rope_dims: usize, ) -> Vec<AttentionLayerConfig>
Build attention layer configs for a sliding/global pattern.
pattern_period layers form one cycle, where the last layer is Global
and the rest are Sliding. E.g., period=6 gives 5 sliding + 1 global.
Sourcepub fn build_attention_layer_configs_from_pattern(
is_swa: &[bool],
sliding_head_dim: usize,
sliding_kv_heads: usize,
sliding_rope_freq_base: f32,
sliding_rope_dims: usize,
sliding_window: usize,
global_head_dim: usize,
global_kv_heads: usize,
global_rope_freq_base: f32,
global_rope_dims: usize,
) -> Vec<AttentionLayerConfig>
pub fn build_attention_layer_configs_from_pattern( is_swa: &[bool], sliding_head_dim: usize, sliding_kv_heads: usize, sliding_rope_freq_base: f32, sliding_rope_dims: usize, sliding_window: usize, global_head_dim: usize, global_kv_heads: usize, global_rope_freq_base: f32, global_rope_dims: usize, ) -> Vec<AttentionLayerConfig>
Build attention layer configs from a per-layer boolean SWA pattern.
is_swa[i] is true if layer i uses sliding-window attention, false
for global attention. This matches the sliding_window_pattern array
stored in Gemma 4 GGUF files.
Sourcepub fn build_kv_source_mapping(
num_layers: usize,
shared_layers: usize,
layer_configs: &[AttentionLayerConfig],
) -> Vec<usize>
pub fn build_kv_source_mapping( num_layers: usize, shared_layers: usize, layer_configs: &[AttentionLayerConfig], ) -> Vec<usize>
Build KV source layer mapping for shared KV cache.
The last shared_layers layers reuse cached K/V from earlier layers
instead of projecting their own. The mapping is TYPE-SPECIFIC: shared
SWA layers map to the last KV-owning SWA layer, shared global layers
map to the last KV-owning global layer.
Requires layer_configs to determine each layer’s type.
Trait Implementations§
Source§impl Clone for ModelConfig
impl Clone for ModelConfig
Source§fn clone(&self) -> ModelConfig
fn clone(&self) -> ModelConfig
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for ModelConfig
impl Debug for ModelConfig
Source§impl Default for ModelConfig
impl Default for ModelConfig
Source§impl<'de> Deserialize<'de> for ModelConfig
impl<'de> Deserialize<'de> for ModelConfig
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Auto Trait Implementations§
impl Freeze for ModelConfig
impl RefUnwindSafe for ModelConfig
impl Send for ModelConfig
impl Sync for ModelConfig
impl Unpin for ModelConfig
impl UnsafeUnpin for ModelConfig
impl UnwindSafe for ModelConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> DeserializeOwned for Twhere
T: for<'de> Deserialize<'de>,
impl<A, B, T> HttpServerConnExec<A, B> for Twhere
B: Body,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more