use super::types::{BlockWeights, GpuModelConfig};
use crate::error::Result;
pub(super) struct GgufWeights {
pub config: GpuModelConfig,
pub embedding_weights: Vec<f32>,
pub block_weights: Vec<BlockWeights>,
pub final_norm_weight: Vec<f32>,
pub final_norm_bias: Vec<f32>,
pub lm_head_weight: Vec<f32>,
pub lm_head_bias: Vec<f32>,
}
pub(super) fn load_weights_from_gguf(mapped: &crate::gguf::MappedGGUFModel) -> Result<GgufWeights> {
use crate::gguf::ValidatedModelConfig;
let gguf_config = ValidatedModelConfig::from_gguf(&mapped.model)?.into_inner();
let config = GpuModelConfig {
vocab_size: gguf_config.vocab_size,
hidden_dim: gguf_config.hidden_dim,
num_heads: gguf_config.num_heads,
num_kv_heads: gguf_config.num_kv_heads, num_layers: gguf_config.num_layers,
intermediate_dim: gguf_config.intermediate_dim,
eps: gguf_config.eps,
rope_theta: gguf_config.rope_theta, explicit_head_dim: gguf_config.explicit_head_dim,
layer_types: None,
linear_key_head_dim: None,
linear_value_head_dim: None,
linear_num_key_heads: None,
linear_num_value_heads: None,
linear_conv_kernel_dim: None,
constraints: Some(gguf_config.constraints),
num_experts: None,
num_experts_per_tok: None,
expert_intermediate_size: None,
};
let data = mapped.data();
let embedding_weights = mapped.model.get_tensor_f32("token_embd.weight", data)?;
let block_weights = load_block_weights(mapped, &config, data)?;
if let Some(ref constraints) = config.constraints {
validate_block_completeness(&block_weights, constraints, &config)?;
}
let final_norm_weight = mapped.model.get_tensor_f32("output_norm.weight", data)?;
let final_norm_bias = mapped
.model
.get_tensor_f32("output_norm.bias", data)
.unwrap_or_else(|_| vec![0.0f32; config.hidden_dim]);
let lm_head_weight = mapped.model.get_tensor_f32("output.weight", data)?;
let lm_head_bias = mapped
.model
.get_tensor_f32("output.bias", data)
.unwrap_or_else(|_| vec![0.0f32; config.vocab_size]);
Ok(GgufWeights {
config,
embedding_weights,
block_weights,
final_norm_weight,
final_norm_bias,
lm_head_weight,
lm_head_bias,
})
}
fn load_block_weights(
mapped: &crate::gguf::MappedGGUFModel,
config: &GpuModelConfig,
data: &[u8],
) -> Result<Vec<BlockWeights>> {
let mut block_weights = Vec::with_capacity(config.num_layers);
for layer_idx in 0..config.num_layers {
let prefix = format!("blk.{}", layer_idx);
let attn_norm_weight = mapped
.model
.get_tensor_f32(&format!("{}.attn_norm.weight", prefix), data)?;
let attn_norm_bias = mapped
.model
.get_tensor_f32(&format!("{}.attn_norm.bias", prefix), data)
.unwrap_or_else(|_| vec![0.0f32; config.hidden_dim]);
let (qkv_weight, qkv_bias) = load_qkv_weights(mapped, config, data, &prefix)?;
let out_weight = mapped
.model
.get_tensor_f32(&format!("{}.attn_output.weight", prefix), data)?;
let out_bias = mapped
.model
.get_tensor_f32(&format!("{}.attn_output.bias", prefix), data)
.unwrap_or_else(|_| vec![0.0f32; config.hidden_dim]);
let ffn_norm_weight = mapped
.model
.get_tensor_f32(&format!("{}.ffn_norm.weight", prefix), data)
.unwrap_or_else(|_| vec![1.0f32; config.hidden_dim]);
let ffn_norm_bias = mapped
.model
.get_tensor_f32(&format!("{}.ffn_norm.bias", prefix), data)
.unwrap_or_else(|_| vec![0.0f32; config.hidden_dim]);
let ffn_fc1_weight = mapped
.model
.get_tensor_f32(&format!("{}.ffn_up.weight", prefix), data)?;
let ffn_fc1_bias = mapped
.model
.get_tensor_f32(&format!("{}.ffn_up.bias", prefix), data)
.unwrap_or_else(|_| vec![0.0f32; config.intermediate_dim]);
let ffn_fc2_weight = mapped
.model
.get_tensor_f32(&format!("{}.ffn_down.weight", prefix), data)?;
let ffn_fc2_bias = mapped
.model
.get_tensor_f32(&format!("{}.ffn_down.bias", prefix), data)
.unwrap_or_else(|_| vec![0.0f32; config.hidden_dim]);
let ffn_gate_weight = mapped
.model
.get_tensor_f32(&format!("{}.ffn_gate.weight", prefix), data)
.ok();
block_weights.push(BlockWeights {
attn_norm_weight,
attn_norm_bias,
qkv_weight,
qkv_bias,
out_weight,
out_bias,
ffn_norm_weight,
ffn_norm_bias,
ffn_fc1_weight,
ffn_fc1_bias,
ffn_fc2_weight,
ffn_fc2_bias,
ffn_gate_weight,
linear_attn: None,
moe_experts: None,
});
}
Ok(block_weights)
}
fn load_qkv_weights(
mapped: &crate::gguf::MappedGGUFModel,
config: &GpuModelConfig,
data: &[u8],
prefix: &str,
) -> Result<(Vec<f32>, Vec<f32>)> {
if let Ok(fused_qkv) = mapped
.model
.get_tensor_f32(&format!("{}.attn_qkv.weight", prefix), data)
{
let bias = mapped
.model
.get_tensor_f32(&format!("{}.attn_qkv.bias", prefix), data)
.unwrap_or_else(|_| vec![0.0f32; 3 * config.hidden_dim]);
Ok((fused_qkv, bias))
} else {
let head_dim = config.head_dim();
let kv_dim = config.num_kv_heads * head_dim;
let q_weight = mapped
.model
.get_tensor_f32(&format!("{}.attn_q.weight", prefix), data)?;
let k_weight = mapped
.model
.get_tensor_f32(&format!("{}.attn_k.weight", prefix), data)?;
let v_weight = mapped
.model
.get_tensor_f32(&format!("{}.attn_v.weight", prefix), data)?;
let mut qkv_weight = Vec::with_capacity(q_weight.len() + k_weight.len() + v_weight.len());
qkv_weight.extend_from_slice(&q_weight);
qkv_weight.extend_from_slice(&k_weight);
qkv_weight.extend_from_slice(&v_weight);
let q_bias = mapped
.model
.get_tensor_f32(&format!("{}.attn_q.bias", prefix), data)
.unwrap_or_else(|_| vec![0.0f32; config.hidden_dim]);
let k_bias = mapped
.model
.get_tensor_f32(&format!("{}.attn_k.bias", prefix), data)
.unwrap_or_else(|_| vec![0.0f32; kv_dim]); let v_bias = mapped
.model
.get_tensor_f32(&format!("{}.attn_v.bias", prefix), data)
.unwrap_or_else(|_| vec![0.0f32; kv_dim]);
let total_bias_dim = config.hidden_dim + 2 * kv_dim;
let mut qkv_bias = Vec::with_capacity(total_bias_dim);
qkv_bias.extend_from_slice(&q_bias);
qkv_bias.extend_from_slice(&k_bias);
qkv_bias.extend_from_slice(&v_bias);
Ok((qkv_weight, qkv_bias))
}
}
fn validate_block_completeness(
blocks: &[BlockWeights],
constraints: &crate::gguf::ArchConstraints,
config: &GpuModelConfig,
) -> Result<()> {
use crate::arch_requirements::{required_roles, WeightRole};
let roles = required_roles(constraints);
for (layer_idx, block) in blocks.iter().enumerate() {
for &role in roles {
let missing = match role {
WeightRole::FfnGate => block.ffn_gate_weight.is_none(),
WeightRole::AttnNorm => block.attn_norm_weight.is_empty(),
WeightRole::FfnNorm => block.ffn_norm_weight.is_empty(),
WeightRole::QProj | WeightRole::KProj | WeightRole::VProj => {
block.qkv_weight.is_empty()
},
WeightRole::OProj => block.out_weight.is_empty(),
WeightRole::FfnUp => block.ffn_fc1_weight.is_empty(),
WeightRole::FfnDown => block.ffn_fc2_weight.is_empty(),
WeightRole::AttnQNorm | WeightRole::AttnKNorm => {
constraints.has_qk_norm
},
WeightRole::AttnQBias | WeightRole::AttnKBias | WeightRole::AttnVBias => {
false
},
};
if missing {
return Err(crate::error::RealizarError::UnsupportedOperation {
operation: "validate_block_completeness".to_string(),
reason: format!(
"GH-279: Layer {} missing required weight '{}' for architecture \
(has_qk_norm={}, has_bias={}, {} layers, hidden={})",
layer_idx,
role.field_name(),
constraints.has_qk_norm,
constraints.has_bias,
config.num_layers,
config.hidden_dim,
),
});
}
}
}
Ok(())
}