impl SafetensorsToAprConverter {
pub fn convert(model_path: &Path) -> Result<ValidatedAprTransformer> {
let st_model = MappedSafeTensorsModel::load(model_path)?;
let config = SafetensorsConfig::load_from_sibling(model_path).ok_or_else(|| {
RealizarError::UnsupportedOperation {
operation: "safetensors_convert".to_string(),
reason: "config.json not found (required for SafeTensors inference)".to_string(),
}
})?;
Self::convert_from_source(&st_model, &config)
}
#[cfg(not(target_arch = "wasm32"))]
pub fn convert_sharded(
sharded: &ShardedSafeTensorsModel,
config: &SafetensorsConfig,
) -> Result<ValidatedAprTransformer> {
Self::convert_from_source(sharded, config)
}
fn convert_from_source<S: TensorSource>(
source: &S,
config: &SafetensorsConfig,
) -> Result<ValidatedAprTransformer> {
let apr_config = Self::build_apr_config(config)?;
Self::log_phase2_warning(config);
Self::log_hybrid_attention_info(config);
let model_prefix = Self::detect_model_prefix(source);
let token_embedding = Self::get_tensor_with_fallback_generic(
source,
&format!("{model_prefix}.embed_tokens.weight"),
"token_embd.weight",
)?;
let output_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{model_prefix}.norm.weight"),
"output_norm.weight",
)?;
let lm_head_weight = Self::resolve_lm_head_weight(
source,
config,
&token_embedding,
apr_config.vocab_size,
apr_config.hidden_dim,
)?;
let layers = Self::build_layers(source, config, &apr_config, &model_prefix)?;
let transformer = AprTransformer {
config: apr_config,
token_embedding,
layers,
output_norm_weight,
output_norm_bias: None,
lm_head_weight,
lm_head_bias: None,
q4k_layers: None,
lm_head_weight_q6k: None,
lm_head_weight_q4k: None,
};
ValidatedAprTransformer::validate(transformer).map_err(Into::into)
}
fn build_apr_config(config: &SafetensorsConfig) -> Result<AprTransformerConfig> {
let hidden_dim = Self::required_config_field(config.hidden_size, "hidden_size")?;
let num_layers = Self::required_config_field(config.num_hidden_layers, "num_hidden_layers")?;
let num_heads =
Self::required_config_field(config.num_attention_heads, "num_attention_heads")?;
let vocab_size = Self::required_config_field(config.vocab_size, "vocab_size")?;
let architecture = config.architecture();
let rope_theta = config
.rope_theta
.unwrap_or_else(|| crate::gguf::default_rope_theta_for_architecture(&architecture));
Ok(AprTransformerConfig {
architecture,
hidden_dim,
num_layers,
num_heads,
num_kv_heads: config.num_kv_heads(),
vocab_size,
intermediate_dim: config.intermediate_size.unwrap_or(hidden_dim * 4),
context_length: config.max_position_embeddings.unwrap_or(0),
rope_theta,
eps: config.rms_norm_eps.unwrap_or(1e-6),
eos_token_id: config.eos_token_id,
explicit_head_dim: config.head_dim,
layer_types: config.layer_types.clone(),
linear_key_head_dim: config.linear_key_head_dim,
linear_value_head_dim: config.linear_value_head_dim,
linear_num_key_heads: config.linear_num_key_heads,
linear_num_value_heads: config.linear_num_value_heads,
linear_conv_kernel_dim: config.linear_conv_kernel_dim,
num_experts: config.num_experts,
num_experts_per_tok: config.num_experts_per_tok,
expert_intermediate_size: config.moe_intermediate_size,
})
}
fn required_config_field(value: Option<usize>, field: &str) -> Result<usize> {
value.ok_or_else(|| RealizarError::FormatError {
reason: format!("config.json missing {field}"),
})
}
fn log_phase2_warning(config: &SafetensorsConfig) {
if let Err(e) = crate::gguf::ValidatedModelConfig::from_safetensors_config(config) {
eprintln!(
"[Phase2-WARN] SafeTensors config validation: {e} — proceeding with conversion"
);
}
}
fn log_hybrid_attention_info(config: &SafetensorsConfig) {
if !config.is_hybrid_attention() {
return;
}
let layer_count = config.layer_types.as_ref().map_or(0, Vec::len);
let linear_count = config.layer_types.as_ref().map_or(0, |t| {
t.iter()
.filter(|l| *l == "linear" || *l == "linear_attention")
.count()
});
eprintln!(
"[GH-278] Hybrid attention model detected: {}/{} linear layers, head_dim={:?}",
linear_count, layer_count, config.head_dim,
);
}
fn resolve_lm_head_weight<S: TensorSource>(
source: &S,
config: &SafetensorsConfig,
token_embedding: &[f32],
vocab_size: usize,
hidden_dim: usize,
) -> Result<Vec<f32>> {
if config.tie_word_embeddings.unwrap_or(false) {
return Ok(Self::transpose_weight(token_embedding, vocab_size, hidden_dim));
}
if Self::has_tensor_with_fallback_generic(source, "lm_head.weight", "output.weight") {
let raw =
Self::get_tensor_with_fallback_generic(source, "lm_head.weight", "output.weight")?;
return Ok(Self::transpose_weight(&raw, vocab_size, hidden_dim));
}
Ok(Self::transpose_weight(token_embedding, vocab_size, hidden_dim))
}
fn build_layers<S: TensorSource>(
source: &S,
config: &SafetensorsConfig,
apr_config: &AprTransformerConfig,
model_prefix: &str,
) -> Result<Vec<AprTransformerLayer>> {
let is_moe = config.num_experts.is_some();
let mut layers = Vec::with_capacity(apr_config.num_layers);
for i in 0..apr_config.num_layers {
let mut layer = Self::extract_single_layer(source, config, apr_config, i, model_prefix)?;
if is_moe {
Self::load_moe_weights(
source,
i,
model_prefix,
config,
apr_config.hidden_dim,
&mut layer,
)?;
}
layers.push(layer);
}
Ok(layers)
}
fn extract_single_layer<S: TensorSource>(
source: &S,
config: &SafetensorsConfig,
apr_config: &AprTransformerConfig,
layer_idx: usize,
model_prefix: &str,
) -> Result<AprTransformerLayer> {
if Self::is_linear_attention_layer(config, layer_idx) {
Self::extract_linear_layer_generic(
source,
layer_idx,
apr_config.hidden_dim,
apr_config.intermediate_dim,
config,
model_prefix,
)
} else {
Self::extract_layer_generic_with_prefix(
source,
layer_idx,
apr_config.hidden_dim,
apr_config.num_heads,
apr_config.num_kv_heads,
apr_config.intermediate_dim,
model_prefix,
)
}
}
fn is_linear_attention_layer(config: &SafetensorsConfig, layer_idx: usize) -> bool {
config
.layer_types
.as_ref()
.and_then(|lt| lt.get(layer_idx))
.is_some_and(|t| t == "linear" || t == "linear_attention")
}
#[allow(dead_code)]
fn extract_layer(
st_model: &MappedSafeTensorsModel,
layer_idx: usize,
hidden_dim: usize,
num_heads: usize,
num_kv_heads: usize,
intermediate_dim: usize,
) -> Result<AprTransformerLayer> {
Self::extract_layer_generic(
st_model,
layer_idx,
hidden_dim,
num_heads,
num_kv_heads,
intermediate_dim,
)
}
fn extract_layer_generic<S: TensorSource>(
source: &S,
layer_idx: usize,
hidden_dim: usize,
num_heads: usize,
num_kv_heads: usize,
intermediate_dim: usize,
) -> Result<AprTransformerLayer> {
let hf_prefix = format!("model.layers.{layer_idx}");
let gguf_prefix = format!("blk.{layer_idx}");
let attn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.input_layernorm.weight"),
&format!("{gguf_prefix}.attn_norm.weight"),
)?;
let q_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.q_proj.weight"),
&format!("{gguf_prefix}.attn_q.weight"),
)?;
let k_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.k_proj.weight"),
&format!("{gguf_prefix}.attn_k.weight"),
)?;
let v_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.v_proj.weight"),
&format!("{gguf_prefix}.attn_v.weight"),
)?;
let head_dim = hidden_dim / num_heads;
let kv_dim = head_dim * num_kv_heads;
let qkv_weight =
Self::concat_qkv_transposed(&q_weight, &k_weight, &v_weight, hidden_dim, kv_dim);
let qkv_bias = Self::try_concat_qkv_bias_dual_generic(
source,
&hf_prefix,
&gguf_prefix,
hidden_dim,
kv_dim,
);
let attn_output_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.o_proj.weight"),
&format!("{gguf_prefix}.attn_output.weight"),
)?;
let attn_output_weight = Self::transpose_weight(&attn_output_raw, hidden_dim, hidden_dim);
let ffn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.post_attention_layernorm.weight"),
&format!("{gguf_prefix}.ffn_norm.weight"),
)?;
let ffn_gate_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.gate_proj.weight"),
&format!("{gguf_prefix}.ffn_gate.weight"),
)?;
let ffn_gate_weight = Self::transpose_weight(&ffn_gate_raw, intermediate_dim, hidden_dim);
let ffn_up_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.up_proj.weight"),
&format!("{gguf_prefix}.ffn_up.weight"),
)?;
let ffn_up_weight = Self::transpose_weight(&ffn_up_raw, intermediate_dim, hidden_dim);
let ffn_down_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.down_proj.weight"),
&format!("{gguf_prefix}.ffn_down.weight"),
)?;
let ffn_down_weight = Self::transpose_weight(&ffn_down_raw, hidden_dim, intermediate_dim);
Ok(AprTransformerLayer {
attn_norm_weight,
attn_norm_bias: None,
qkv_weight,
qkv_bias,
attn_output_weight,
attn_output_bias: None,
ffn_gate_weight: Some(ffn_gate_weight),
ffn_gate_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_norm_weight: Some(ffn_norm_weight),
ffn_norm_bias: None,
attn_q_norm_weight: source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.q_norm.weight"))
.or_else(|_| source.get_tensor_auto(&format!("{gguf_prefix}.attn_q_norm.weight")))
.ok(),
attn_k_norm_weight: source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.k_norm.weight"))
.or_else(|_| source.get_tensor_auto(&format!("{gguf_prefix}.attn_k_norm.weight")))
.ok(),
linear_attn_z_weight: None,
linear_attn_b_weight: None,
linear_attn_a_weight: None,
linear_attn_conv1d_weight: None,
linear_attn_a_log: None,
linear_attn_dt_bias: None,
linear_attn_norm_weight: None,
moe_gate_weight: None,
moe_expert_gate_up: None,
moe_expert_down: None,
moe_shared_gate: None,
moe_shared_up: None,
moe_shared_down: None,
moe_shared_expert_gate_weight: None,
})
}
fn extract_linear_layer_generic<S: TensorSource>(
source: &S,
layer_idx: usize,
hidden_dim: usize,
intermediate_dim: usize,
config: &SafetensorsConfig,
model_prefix: &str,
) -> Result<AprTransformerLayer> {
let hf_prefix = format!("{model_prefix}.layers.{layer_idx}");
let attn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.input_layernorm.weight"),
&format!("blk.{layer_idx}.attn_norm.weight"),
)?;
let key_head_dim = config.linear_key_head_dim.unwrap_or(128);
let value_head_dim = config.linear_value_head_dim.unwrap_or(128);
let num_key_heads = config.linear_num_key_heads.unwrap_or(16);
let num_value_heads = config.linear_num_value_heads.unwrap_or(32);
let key_dim = num_key_heads * key_head_dim;
let value_dim = num_value_heads * value_head_dim;
let combined_qkvz_name = format!("{hf_prefix}.self_attn.in_proj_qkvz.weight");
let separate_qkv_name = format!("{hf_prefix}.linear_attn.in_proj_qkv.weight");
let (qkv_weight, z_weight, b_weight, a_weight, attn_sub) =
if source.has_tensor(&combined_qkvz_name) {
let in_proj_qkvz = source.get_tensor_auto(&combined_qkvz_name)?;
let qkvz_out_dim = 2 * key_dim + 2 * value_dim;
let expected_qkvz = qkvz_out_dim * hidden_dim;
if in_proj_qkvz.len() != expected_qkvz {
return Err(RealizarError::FormatError {
reason: format!(
"GH-278: in_proj_qkvz size mismatch at layer {layer_idx}: \
expected {expected_qkvz}, got {}",
in_proj_qkvz.len()
),
});
}
let q_end = key_dim * hidden_dim;
let k_end = q_end + key_dim * hidden_dim;
let v_end = k_end + value_dim * hidden_dim;
let qkv = Self::concat_qkv(&in_proj_qkvz[..q_end], &in_proj_qkvz[q_end..k_end], &in_proj_qkvz[k_end..v_end]);
let z = in_proj_qkvz[v_end..].to_vec();
let in_proj_ba = source.get_tensor_auto(&format!("{hf_prefix}.self_attn.in_proj_ba.weight"))?;
let ba_split = num_value_heads * hidden_dim;
let b = in_proj_ba[..ba_split].to_vec();
let a = in_proj_ba[ba_split..].to_vec();
(qkv, z, b, a, "self_attn")
} else {
let in_proj_qkv = source.get_tensor_auto(&separate_qkv_name)?;
let qkv_out_dim = 2 * key_dim + value_dim;
let expected_qkv = qkv_out_dim * hidden_dim;
if in_proj_qkv.len() != expected_qkv {
return Err(RealizarError::FormatError {
reason: format!(
"ALB-010: in_proj_qkv size mismatch at layer {layer_idx}: \
expected {expected_qkv}, got {}",
in_proj_qkv.len()
),
});
}
let qkv = in_proj_qkv;
let z = source.get_tensor_auto(&format!("{hf_prefix}.linear_attn.in_proj_z.weight"))?;
let b = source.get_tensor_auto(&format!("{hf_prefix}.linear_attn.in_proj_b.weight"))?;
let a = source.get_tensor_auto(&format!("{hf_prefix}.linear_attn.in_proj_a.weight"))?;
(qkv, z, b, a, "linear_attn")
};
let out_proj_raw = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.out_proj.weight"))?;
let attn_output_weight = Self::transpose_weight(&out_proj_raw, hidden_dim, value_dim);
let conv1d_weight = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.conv1d.weight"))?;
let a_log = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.A_log"))?;
let dt_bias = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.dt_bias"))?;
let norm_weight = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.norm.weight"))?;
let ffn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.post_attention_layernorm.weight"),
&format!("blk.{layer_idx}.ffn_norm.weight"),
)?;
let has_dense_ffn = source.has_tensor(&format!("{hf_prefix}.mlp.gate_proj.weight"))
|| source.has_tensor(&format!("blk.{layer_idx}.ffn_gate.weight"));
let (ffn_gate_weight, ffn_up_weight, ffn_down_weight) = if has_dense_ffn {
let gate_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.gate_proj.weight"),
&format!("blk.{layer_idx}.ffn_gate.weight"),
)?;
let up_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.up_proj.weight"),
&format!("blk.{layer_idx}.ffn_up.weight"),
)?;
let down_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.down_proj.weight"),
&format!("blk.{layer_idx}.ffn_down.weight"),
)?;
(
Some(Self::transpose_weight(&gate_raw, intermediate_dim, hidden_dim)),
Self::transpose_weight(&up_raw, intermediate_dim, hidden_dim),
Self::transpose_weight(&down_raw, hidden_dim, intermediate_dim),
)
} else {
(None, vec![0.0; intermediate_dim * hidden_dim], vec![0.0; hidden_dim * intermediate_dim])
};
Ok(AprTransformerLayer {
attn_norm_weight,
attn_norm_bias: None,
qkv_weight,
qkv_bias: None,
attn_output_weight,
attn_output_bias: None,
ffn_gate_weight,
ffn_gate_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_norm_weight: Some(ffn_norm_weight),
ffn_norm_bias: None,
attn_q_norm_weight: None,
attn_k_norm_weight: None,
linear_attn_z_weight: Some(z_weight),
linear_attn_b_weight: Some(b_weight),
linear_attn_a_weight: Some(a_weight),
linear_attn_conv1d_weight: Some(conv1d_weight),
linear_attn_a_log: Some(a_log),
linear_attn_dt_bias: Some(dt_bias),
linear_attn_norm_weight: Some(norm_weight),
moe_gate_weight: None,
moe_expert_gate_up: None,
moe_expert_down: None,
moe_shared_gate: None,
moe_shared_up: None,
moe_shared_down: None,
moe_shared_expert_gate_weight: None,
})
}
fn detect_model_prefix<S: TensorSource>(source: &S) -> String {
let names = source.tensor_names();
for name in &names {
if name.starts_with("model.language_model.") {
return "model.language_model".to_string();
}
}
"model".to_string()
}
fn extract_layer_generic_with_prefix<S: TensorSource>(
source: &S,
layer_idx: usize,
hidden_dim: usize,
num_heads: usize,
num_kv_heads: usize,
intermediate_dim: usize,
model_prefix: &str,
) -> Result<AprTransformerLayer> {
let hf_prefix = format!("{model_prefix}.layers.{layer_idx}");
let gguf_prefix = format!("blk.{layer_idx}");
let attn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.input_layernorm.weight"),
&format!("{gguf_prefix}.attn_norm.weight"),
)?;
let q_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.q_proj.weight"),
&format!("{gguf_prefix}.attn_q.weight"),
)?;
let k_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.k_proj.weight"),
&format!("{gguf_prefix}.attn_k.weight"),
)?;
let v_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.v_proj.weight"),
&format!("{gguf_prefix}.attn_v.weight"),
)?;
let head_dim = hidden_dim / num_heads;
let kv_dim = head_dim * num_kv_heads;
let qkv_weight =
Self::concat_qkv_transposed(&q_weight, &k_weight, &v_weight, hidden_dim, kv_dim);
let qkv_bias = Self::try_concat_qkv_bias_dual_generic(
source,
&hf_prefix,
&gguf_prefix,
hidden_dim,
kv_dim,
);
let attn_output_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.o_proj.weight"),
&format!("{gguf_prefix}.attn_output.weight"),
)?;
let attn_output_weight = Self::transpose_weight(&attn_output_raw, hidden_dim, hidden_dim);
let ffn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.post_attention_layernorm.weight"),
&format!("{gguf_prefix}.ffn_norm.weight"),
)?;
let has_dense_ffn = source.has_tensor(&format!("{hf_prefix}.mlp.gate_proj.weight"))
|| source.has_tensor(&format!("{gguf_prefix}.ffn_gate.weight"));
let (ffn_gate_weight, ffn_up_weight, ffn_down_weight) = if has_dense_ffn {
let gate_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.gate_proj.weight"),
&format!("{gguf_prefix}.ffn_gate.weight"),
)?;
let up_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.up_proj.weight"),
&format!("{gguf_prefix}.ffn_up.weight"),
)?;
let down_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.down_proj.weight"),
&format!("{gguf_prefix}.ffn_down.weight"),
)?;
(
Some(Self::transpose_weight(&gate_raw, intermediate_dim, hidden_dim)),
Self::transpose_weight(&up_raw, intermediate_dim, hidden_dim),
Self::transpose_weight(&down_raw, hidden_dim, intermediate_dim),
)
} else {
(None, vec![0.0; intermediate_dim * hidden_dim], vec![0.0; hidden_dim * intermediate_dim])
};
Ok(AprTransformerLayer {
attn_norm_weight,
attn_norm_bias: None,
qkv_weight,
qkv_bias,
attn_output_weight,
attn_output_bias: None,
ffn_gate_weight,
ffn_gate_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_norm_weight: Some(ffn_norm_weight),
ffn_norm_bias: None,
attn_q_norm_weight: source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.q_norm.weight"))
.or_else(|_| source.get_tensor_auto(&format!("{gguf_prefix}.attn_q_norm.weight")))
.ok(),
attn_k_norm_weight: source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.k_norm.weight"))
.or_else(|_| source.get_tensor_auto(&format!("{gguf_prefix}.attn_k_norm.weight")))
.ok(),
linear_attn_z_weight: None,
linear_attn_b_weight: None,
linear_attn_a_weight: None,
linear_attn_conv1d_weight: None,
linear_attn_a_log: None,
linear_attn_dt_bias: None,
linear_attn_norm_weight: None,
moe_gate_weight: None,
moe_expert_gate_up: None,
moe_expert_down: None,
moe_shared_gate: None,
moe_shared_up: None,
moe_shared_down: None,
moe_shared_expert_gate_weight: None,
})
}
fn load_moe_weights<S: TensorSource>(
source: &S,
layer_idx: usize,
model_prefix: &str,
config: &SafetensorsConfig,
hidden_dim: usize,
layer: &mut AprTransformerLayer,
) -> Result<()> {
let prefix = format!("{model_prefix}.layers.{layer_idx}");
Self::load_moe_router_gate(source, &prefix, layer);
Self::load_moe_expert_tensors(source, &prefix, config, hidden_dim, layer);
Self::load_moe_shared_expert_ffn(source, &prefix, config, layer);
Self::load_moe_shared_expert_gate(source, &prefix, layer);
Ok(())
}
fn load_moe_router_gate<S: TensorSource>(
source: &S,
prefix: &str,
layer: &mut AprTransformerLayer,
) {
if let Ok(gate) = source.get_tensor_auto(&format!("{prefix}.mlp.gate.weight")) {
layer.moe_gate_weight = Some(gate);
}
}
fn load_moe_expert_tensors<S: TensorSource>(
source: &S,
prefix: &str,
config: &SafetensorsConfig,
hidden_dim: usize,
layer: &mut AprTransformerLayer,
) {
if let Ok(gate_up) = source.get_tensor_auto(&format!("{prefix}.mlp.experts.gate_up_proj")) {
layer.moe_expert_gate_up = Some(gate_up);
if let Ok(down) = source.get_tensor_auto(&format!("{prefix}.mlp.experts.down_proj")) {
layer.moe_expert_down = Some(down);
}
return;
}
if let Some((gate_up_packed, down_packed)) =
Self::pack_per_expert_tensors(source, prefix, config, hidden_dim)
{
layer.moe_expert_gate_up = Some(gate_up_packed);
layer.moe_expert_down = Some(down_packed);
}
}
fn pack_per_expert_tensors<S: TensorSource>(
source: &S,
prefix: &str,
config: &SafetensorsConfig,
hidden_dim: usize,
) -> Option<(Vec<f32>, Vec<f32>)> {
let num_experts = config.num_experts.unwrap_or(0);
let moe_intermediate = config.moe_intermediate_size.unwrap_or(0);
if num_experts == 0 || moe_intermediate == 0 {
return None;
}
let mut gate_up_packed = Vec::with_capacity(num_experts * 2 * moe_intermediate * hidden_dim);
let mut down_packed = Vec::with_capacity(num_experts * hidden_dim * moe_intermediate);
let mut found_any = false;
for e in 0..num_experts {
let gate = source.get_tensor_auto(&format!("{prefix}.mlp.experts.{e}.gate_proj.weight"));
let up = source.get_tensor_auto(&format!("{prefix}.mlp.experts.{e}.up_proj.weight"));
let down = source.get_tensor_auto(&format!("{prefix}.mlp.experts.{e}.down_proj.weight"));
if let (Ok(gate), Ok(up), Ok(down)) = (gate, up, down) {
found_any = true;
gate_up_packed.extend_from_slice(&gate);
gate_up_packed.extend_from_slice(&up);
down_packed.extend_from_slice(&down);
} else if found_any {
break;
}
}
found_any.then_some((gate_up_packed, down_packed))
}
fn load_moe_shared_expert_ffn<S: TensorSource>(
source: &S,
prefix: &str,
config: &SafetensorsConfig,
layer: &mut AprTransformerLayer,
) {
let shared_intermediate = config
.shared_expert_intermediate_size
.or(config.moe_intermediate_size)
.unwrap_or(0);
if shared_intermediate == 0 {
return;
}
if let Ok(g) = source.get_tensor_auto(&format!("{prefix}.mlp.shared_expert.gate_proj.weight"))
{
layer.moe_shared_gate = Some(g);
}
if let Ok(u) = source.get_tensor_auto(&format!("{prefix}.mlp.shared_expert.up_proj.weight"))
{
layer.moe_shared_up = Some(u);
}
if let Ok(d) = source.get_tensor_auto(&format!("{prefix}.mlp.shared_expert.down_proj.weight"))
{
layer.moe_shared_down = Some(d);
}
}
fn load_moe_shared_expert_gate<S: TensorSource>(
source: &S,
prefix: &str,
layer: &mut AprTransformerLayer,
) {
if let Ok(sg) = source.get_tensor_auto(&format!("{prefix}.mlp.shared_expert_gate.weight")) {
layer.moe_shared_expert_gate_weight = Some(sg);
}
}
#[allow(clippy::unused_self)]
pub fn transpose_weight(weight: &[f32], _out_dim: usize, _in_dim: usize) -> Vec<f32> {
weight.to_vec()
}
pub fn concat_qkv_transposed(
q: &[f32],
k: &[f32],
v: &[f32],
_hidden_dim: usize,
_kv_dim: usize,
) -> Vec<f32> {
let mut qkv = Vec::with_capacity(q.len() + k.len() + v.len());
qkv.extend_from_slice(q);
qkv.extend_from_slice(k);
qkv.extend_from_slice(v);
qkv
}
fn concat_qkv(q: &[f32], k: &[f32], v: &[f32]) -> Vec<f32> {
let mut qkv = Vec::with_capacity(q.len() + k.len() + v.len());
qkv.extend_from_slice(q);
qkv.extend_from_slice(k);
qkv.extend_from_slice(v);
qkv
}
#[allow(dead_code)]
fn try_concat_qkv_bias(
st_model: &MappedSafeTensorsModel,
prefix: &str,
hidden_dim: usize,
kv_dim: usize,
) -> Option<Vec<f32>> {
let q_bias = st_model
.get_tensor_auto(&format!("{prefix}.self_attn.q_proj.bias"))
.ok()?;
let k_bias = st_model
.get_tensor_auto(&format!("{prefix}.self_attn.k_proj.bias"))
.ok()?;
let v_bias = st_model
.get_tensor_auto(&format!("{prefix}.self_attn.v_proj.bias"))
.ok()?;
let mut qkv_bias = Vec::with_capacity(hidden_dim + kv_dim + kv_dim);
qkv_bias.extend_from_slice(&q_bias);
qkv_bias.extend_from_slice(&k_bias);
qkv_bias.extend_from_slice(&v_bias);
Some(qkv_bias)
}
#[allow(dead_code)]
fn try_concat_qkv_bias_dual(
st_model: &MappedSafeTensorsModel,
hf_prefix: &str,
gguf_prefix: &str,
hidden_dim: usize,
kv_dim: usize,
) -> Option<Vec<f32>> {
Self::try_concat_qkv_bias_dual_generic(st_model, hf_prefix, gguf_prefix, hidden_dim, kv_dim)
}
fn try_concat_qkv_bias_dual_generic<S: TensorSource>(
source: &S,
hf_prefix: &str,
gguf_prefix: &str,
hidden_dim: usize,
kv_dim: usize,
) -> Option<Vec<f32>> {
let q_bias = source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.q_proj.bias"))
.ok()
.or_else(|| {
source
.get_tensor_auto(&format!("{gguf_prefix}.attn_q.bias"))
.ok()
})?;
let k_bias = source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.k_proj.bias"))
.ok()
.or_else(|| {
source
.get_tensor_auto(&format!("{gguf_prefix}.attn_k.bias"))
.ok()
})?;
let v_bias = source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.v_proj.bias"))
.ok()
.or_else(|| {
source
.get_tensor_auto(&format!("{gguf_prefix}.attn_v.bias"))
.ok()
})?;
let mut qkv_bias = Vec::with_capacity(hidden_dim + kv_dim + kv_dim);
qkv_bias.extend_from_slice(&q_bias);
qkv_bias.extend_from_slice(&k_bias);
qkv_bias.extend_from_slice(&v_bias);
Some(qkv_bias)
}
#[allow(dead_code)]
fn get_tensor_with_fallback(
st_model: &MappedSafeTensorsModel,
hf_name: &str,
gguf_name: &str,
) -> Result<Vec<f32>> {
Self::get_tensor_with_fallback_generic(st_model, hf_name, gguf_name)
}
fn get_tensor_with_fallback_generic<S: TensorSource>(
source: &S,
hf_name: &str,
gguf_name: &str,
) -> Result<Vec<f32>> {
if let Ok(t) = source.get_tensor_auto(hf_name) {
return Ok(t);
}
if let Ok(t) = source.get_tensor_auto(gguf_name) {
return Ok(t);
}
let bare_name = hf_name.strip_prefix("model.").unwrap_or(hf_name);
if bare_name != hf_name {
if let Ok(t) = source.get_tensor_auto(bare_name) {
return Ok(t);
}
}
let available = source.tensor_names();
let sample: Vec<&str> = available.iter().take(5).copied().collect();
Err(RealizarError::UnsupportedOperation {
operation: "get_tensor_auto".to_string(),
reason: format!(
"Tensor not found with names: '{}', '{}', or '{}'. \
Available tensors ({} total): {:?}{}",
hf_name,
gguf_name,
bare_name,
available.len(),
sample,
if available.len() > 5 { ", ..." } else { "" }
),
})
}
#[allow(dead_code)]
fn has_tensor_with_fallback(
st_model: &MappedSafeTensorsModel,
hf_name: &str,
gguf_name: &str,
) -> bool {
Self::has_tensor_with_fallback_generic(st_model, hf_name, gguf_name)
}
fn has_tensor_with_fallback_generic<S: TensorSource>(
source: &S,
hf_name: &str,
gguf_name: &str,
) -> bool {
if source.has_tensor(hf_name) || source.has_tensor(gguf_name) {
return true;
}
let bare_name = hf_name.strip_prefix("model.").unwrap_or(hf_name);
bare_name != hf_name && source.has_tensor(bare_name)
}
#[allow(dead_code)]
fn get_optional_tensor_with_fallback(
st_model: &MappedSafeTensorsModel,
hf_name: &str,
gguf_name: &str,
) -> Option<Vec<f32>> {
st_model
.get_tensor_auto(hf_name)
.ok()
.or_else(|| st_model.get_tensor_auto(gguf_name).ok())
.or_else(|| {
let bare_name = hf_name.strip_prefix("model.")?;
st_model.get_tensor_auto(bare_name).ok()
})
}
}