impl SafetensorsToAprConverter {
pub fn convert(model_path: &Path) -> Result<ValidatedAprTransformer> {
let st_model = MappedSafeTensorsModel::load(model_path)?;
let config = SafetensorsConfig::load_from_sibling(model_path).ok_or_else(|| {
RealizarError::UnsupportedOperation {
operation: "safetensors_convert".to_string(),
reason: "config.json not found (required for SafeTensors inference)".to_string(),
}
})?;
Self::convert_from_source(&st_model, &config)
}
#[cfg(not(target_arch = "wasm32"))]
pub fn convert_sharded(
sharded: &ShardedSafeTensorsModel,
config: &SafetensorsConfig,
) -> Result<ValidatedAprTransformer> {
Self::convert_from_source(sharded, config)
}
fn convert_from_source<S: TensorSource>(
source: &S,
config: &SafetensorsConfig,
) -> Result<ValidatedAprTransformer> {
let hidden_dim = config
.hidden_size
.ok_or_else(|| RealizarError::FormatError {
reason: "config.json missing hidden_size".to_string(),
})?;
let num_layers = config
.num_hidden_layers
.ok_or_else(|| RealizarError::FormatError {
reason: "config.json missing num_hidden_layers".to_string(),
})?;
let num_heads = config
.num_attention_heads
.ok_or_else(|| RealizarError::FormatError {
reason: "config.json missing num_attention_heads".to_string(),
})?;
let num_kv_heads = config.num_kv_heads();
let vocab_size = config
.vocab_size
.ok_or_else(|| RealizarError::FormatError {
reason: "config.json missing vocab_size".to_string(),
})?;
let intermediate_dim = config.intermediate_size.unwrap_or(hidden_dim * 4);
let context_length = config.max_position_embeddings.unwrap_or(0);
let architecture = config.architecture();
let rope_theta = config.rope_theta.unwrap_or_else(||
crate::gguf::default_rope_theta_for_architecture(&architecture));
let eps = config.rms_norm_eps.unwrap_or(1e-6);
if let Err(e) = crate::gguf::ValidatedModelConfig::from_safetensors_config(config) {
eprintln!(
"[Phase2-WARN] SafeTensors config validation: {e} — proceeding with conversion"
);
}
if config.is_hybrid_attention() {
let layer_count = config.layer_types.as_ref().map_or(0, Vec::len);
let linear_count = config.layer_types.as_ref().map_or(0, |t| {
t.iter()
.filter(|l| *l == "linear" || *l == "linear_attention")
.count()
});
eprintln!(
"[GH-278] Hybrid attention model detected: {}/{} linear layers, head_dim={:?}",
linear_count,
layer_count,
config.head_dim,
);
}
let apr_config = AprTransformerConfig {
architecture,
hidden_dim,
num_layers,
num_heads,
num_kv_heads,
vocab_size,
intermediate_dim,
context_length,
rope_theta,
eps,
eos_token_id: config.eos_token_id,
explicit_head_dim: config.head_dim,
layer_types: config.layer_types.clone(),
linear_key_head_dim: config.linear_key_head_dim,
linear_value_head_dim: config.linear_value_head_dim,
linear_num_key_heads: config.linear_num_key_heads,
linear_num_value_heads: config.linear_num_value_heads,
linear_conv_kernel_dim: config.linear_conv_kernel_dim,
num_experts: config.num_experts,
num_experts_per_tok: config.num_experts_per_tok,
expert_intermediate_size: config.moe_intermediate_size,
};
let model_prefix = Self::detect_model_prefix(source);
let is_moe = config.num_experts.is_some();
let embed_name = format!("{model_prefix}.embed_tokens.weight");
let token_embedding = Self::get_tensor_with_fallback_generic(
source,
&embed_name,
"token_embd.weight",
)?;
let norm_name = format!("{model_prefix}.norm.weight");
let output_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&norm_name,
"output_norm.weight",
)?;
let use_tied_embeddings = config.tie_word_embeddings.unwrap_or(false);
let lm_head_weight = if use_tied_embeddings {
Self::transpose_weight(&token_embedding, vocab_size, hidden_dim)
} else if Self::has_tensor_with_fallback_generic(source, "lm_head.weight", "output.weight")
{
let raw =
Self::get_tensor_with_fallback_generic(source, "lm_head.weight", "output.weight")?;
Self::transpose_weight(&raw, vocab_size, hidden_dim)
} else {
Self::transpose_weight(&token_embedding, vocab_size, hidden_dim)
};
let mut layers = Vec::with_capacity(num_layers);
for i in 0..num_layers {
let is_linear = config
.layer_types
.as_ref()
.and_then(|lt| lt.get(i))
.is_some_and(|t| t == "linear" || t == "linear_attention");
let mut layer = if is_linear {
Self::extract_linear_layer_generic(
source,
i,
hidden_dim,
intermediate_dim,
config,
&model_prefix,
)?
} else {
Self::extract_layer_generic_with_prefix(
source,
i,
hidden_dim,
num_heads,
num_kv_heads,
intermediate_dim,
&model_prefix,
)?
};
if is_moe {
Self::load_moe_weights(source, i, &model_prefix, config, hidden_dim, &mut layer)?;
}
layers.push(layer);
}
let transformer = AprTransformer {
config: apr_config,
token_embedding,
layers,
output_norm_weight,
output_norm_bias: None,
lm_head_weight,
lm_head_bias: None,
q4k_layers: None,
lm_head_weight_q6k: None,
lm_head_weight_q4k: None,
};
ValidatedAprTransformer::validate(transformer).map_err(Into::into)
}
#[allow(dead_code)]
fn extract_layer(
st_model: &MappedSafeTensorsModel,
layer_idx: usize,
hidden_dim: usize,
num_heads: usize,
num_kv_heads: usize,
intermediate_dim: usize,
) -> Result<AprTransformerLayer> {
Self::extract_layer_generic(
st_model,
layer_idx,
hidden_dim,
num_heads,
num_kv_heads,
intermediate_dim,
)
}
fn extract_layer_generic<S: TensorSource>(
source: &S,
layer_idx: usize,
hidden_dim: usize,
num_heads: usize,
num_kv_heads: usize,
intermediate_dim: usize,
) -> Result<AprTransformerLayer> {
let hf_prefix = format!("model.layers.{layer_idx}");
let gguf_prefix = format!("blk.{layer_idx}");
let attn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.input_layernorm.weight"),
&format!("{gguf_prefix}.attn_norm.weight"),
)?;
let q_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.q_proj.weight"),
&format!("{gguf_prefix}.attn_q.weight"),
)?;
let k_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.k_proj.weight"),
&format!("{gguf_prefix}.attn_k.weight"),
)?;
let v_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.v_proj.weight"),
&format!("{gguf_prefix}.attn_v.weight"),
)?;
let head_dim = hidden_dim / num_heads;
let kv_dim = head_dim * num_kv_heads;
let qkv_weight =
Self::concat_qkv_transposed(&q_weight, &k_weight, &v_weight, hidden_dim, kv_dim);
let qkv_bias = Self::try_concat_qkv_bias_dual_generic(
source,
&hf_prefix,
&gguf_prefix,
hidden_dim,
kv_dim,
);
let attn_output_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.o_proj.weight"),
&format!("{gguf_prefix}.attn_output.weight"),
)?;
let attn_output_weight = Self::transpose_weight(&attn_output_raw, hidden_dim, hidden_dim);
let ffn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.post_attention_layernorm.weight"),
&format!("{gguf_prefix}.ffn_norm.weight"),
)?;
let ffn_gate_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.gate_proj.weight"),
&format!("{gguf_prefix}.ffn_gate.weight"),
)?;
let ffn_gate_weight = Self::transpose_weight(&ffn_gate_raw, intermediate_dim, hidden_dim);
let ffn_up_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.up_proj.weight"),
&format!("{gguf_prefix}.ffn_up.weight"),
)?;
let ffn_up_weight = Self::transpose_weight(&ffn_up_raw, intermediate_dim, hidden_dim);
let ffn_down_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.down_proj.weight"),
&format!("{gguf_prefix}.ffn_down.weight"),
)?;
let ffn_down_weight = Self::transpose_weight(&ffn_down_raw, hidden_dim, intermediate_dim);
Ok(AprTransformerLayer {
attn_norm_weight,
attn_norm_bias: None,
qkv_weight,
qkv_bias,
attn_output_weight,
attn_output_bias: None,
ffn_gate_weight: Some(ffn_gate_weight),
ffn_gate_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_norm_weight: Some(ffn_norm_weight),
ffn_norm_bias: None,
attn_q_norm_weight: source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.q_norm.weight"))
.or_else(|_| source.get_tensor_auto(&format!("{gguf_prefix}.attn_q_norm.weight")))
.ok(),
attn_k_norm_weight: source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.k_norm.weight"))
.or_else(|_| source.get_tensor_auto(&format!("{gguf_prefix}.attn_k_norm.weight")))
.ok(),
linear_attn_z_weight: None,
linear_attn_b_weight: None,
linear_attn_a_weight: None,
linear_attn_conv1d_weight: None,
linear_attn_a_log: None,
linear_attn_dt_bias: None,
linear_attn_norm_weight: None,
moe_gate_weight: None,
moe_expert_gate_up: None,
moe_expert_down: None,
moe_shared_gate: None,
moe_shared_up: None,
moe_shared_down: None,
moe_shared_expert_gate_weight: None,
})
}
fn extract_linear_layer_generic<S: TensorSource>(
source: &S,
layer_idx: usize,
hidden_dim: usize,
intermediate_dim: usize,
config: &SafetensorsConfig,
model_prefix: &str,
) -> Result<AprTransformerLayer> {
let hf_prefix = format!("{model_prefix}.layers.{layer_idx}");
let attn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.input_layernorm.weight"),
&format!("blk.{layer_idx}.attn_norm.weight"),
)?;
let key_head_dim = config.linear_key_head_dim.unwrap_or(128);
let value_head_dim = config.linear_value_head_dim.unwrap_or(128);
let num_key_heads = config.linear_num_key_heads.unwrap_or(16);
let num_value_heads = config.linear_num_value_heads.unwrap_or(32);
let key_dim = num_key_heads * key_head_dim;
let value_dim = num_value_heads * value_head_dim;
let combined_qkvz_name = format!("{hf_prefix}.self_attn.in_proj_qkvz.weight");
let separate_qkv_name = format!("{hf_prefix}.linear_attn.in_proj_qkv.weight");
let (qkv_weight, z_weight, b_weight, a_weight, attn_sub) =
if source.has_tensor(&combined_qkvz_name) {
let in_proj_qkvz = source.get_tensor_auto(&combined_qkvz_name)?;
let qkvz_out_dim = 2 * key_dim + 2 * value_dim;
let expected_qkvz = qkvz_out_dim * hidden_dim;
if in_proj_qkvz.len() != expected_qkvz {
return Err(RealizarError::FormatError {
reason: format!(
"GH-278: in_proj_qkvz size mismatch at layer {layer_idx}: \
expected {expected_qkvz}, got {}",
in_proj_qkvz.len()
),
});
}
let q_end = key_dim * hidden_dim;
let k_end = q_end + key_dim * hidden_dim;
let v_end = k_end + value_dim * hidden_dim;
let qkv = Self::concat_qkv(&in_proj_qkvz[..q_end], &in_proj_qkvz[q_end..k_end], &in_proj_qkvz[k_end..v_end]);
let z = in_proj_qkvz[v_end..].to_vec();
let in_proj_ba = source.get_tensor_auto(&format!("{hf_prefix}.self_attn.in_proj_ba.weight"))?;
let ba_split = num_value_heads * hidden_dim;
let b = in_proj_ba[..ba_split].to_vec();
let a = in_proj_ba[ba_split..].to_vec();
(qkv, z, b, a, "self_attn")
} else {
let in_proj_qkv = source.get_tensor_auto(&separate_qkv_name)?;
let qkv_out_dim = 2 * key_dim + value_dim;
let expected_qkv = qkv_out_dim * hidden_dim;
if in_proj_qkv.len() != expected_qkv {
return Err(RealizarError::FormatError {
reason: format!(
"ALB-010: in_proj_qkv size mismatch at layer {layer_idx}: \
expected {expected_qkv}, got {}",
in_proj_qkv.len()
),
});
}
let qkv = in_proj_qkv;
let z = source.get_tensor_auto(&format!("{hf_prefix}.linear_attn.in_proj_z.weight"))?;
let b = source.get_tensor_auto(&format!("{hf_prefix}.linear_attn.in_proj_b.weight"))?;
let a = source.get_tensor_auto(&format!("{hf_prefix}.linear_attn.in_proj_a.weight"))?;
(qkv, z, b, a, "linear_attn")
};
let out_proj_raw = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.out_proj.weight"))?;
let attn_output_weight = Self::transpose_weight(&out_proj_raw, hidden_dim, value_dim);
let conv1d_weight = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.conv1d.weight"))?;
let a_log = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.A_log"))?;
let dt_bias = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.dt_bias"))?;
let norm_weight = source
.get_tensor_auto(&format!("{hf_prefix}.{attn_sub}.norm.weight"))?;
let ffn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.post_attention_layernorm.weight"),
&format!("blk.{layer_idx}.ffn_norm.weight"),
)?;
let has_dense_ffn = source.has_tensor(&format!("{hf_prefix}.mlp.gate_proj.weight"))
|| source.has_tensor(&format!("blk.{layer_idx}.ffn_gate.weight"));
let (ffn_gate_weight, ffn_up_weight, ffn_down_weight) = if has_dense_ffn {
let gate_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.gate_proj.weight"),
&format!("blk.{layer_idx}.ffn_gate.weight"),
)?;
let up_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.up_proj.weight"),
&format!("blk.{layer_idx}.ffn_up.weight"),
)?;
let down_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.down_proj.weight"),
&format!("blk.{layer_idx}.ffn_down.weight"),
)?;
(
Some(Self::transpose_weight(&gate_raw, intermediate_dim, hidden_dim)),
Self::transpose_weight(&up_raw, intermediate_dim, hidden_dim),
Self::transpose_weight(&down_raw, hidden_dim, intermediate_dim),
)
} else {
(None, vec![0.0; intermediate_dim * hidden_dim], vec![0.0; hidden_dim * intermediate_dim])
};
Ok(AprTransformerLayer {
attn_norm_weight,
attn_norm_bias: None,
qkv_weight,
qkv_bias: None,
attn_output_weight,
attn_output_bias: None,
ffn_gate_weight,
ffn_gate_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_norm_weight: Some(ffn_norm_weight),
ffn_norm_bias: None,
attn_q_norm_weight: None,
attn_k_norm_weight: None,
linear_attn_z_weight: Some(z_weight),
linear_attn_b_weight: Some(b_weight),
linear_attn_a_weight: Some(a_weight),
linear_attn_conv1d_weight: Some(conv1d_weight),
linear_attn_a_log: Some(a_log),
linear_attn_dt_bias: Some(dt_bias),
linear_attn_norm_weight: Some(norm_weight),
moe_gate_weight: None,
moe_expert_gate_up: None,
moe_expert_down: None,
moe_shared_gate: None,
moe_shared_up: None,
moe_shared_down: None,
moe_shared_expert_gate_weight: None,
})
}
fn detect_model_prefix<S: TensorSource>(source: &S) -> String {
let names = source.tensor_names();
for name in &names {
if name.starts_with("model.language_model.") {
return "model.language_model".to_string();
}
}
"model".to_string()
}
fn extract_layer_generic_with_prefix<S: TensorSource>(
source: &S,
layer_idx: usize,
hidden_dim: usize,
num_heads: usize,
num_kv_heads: usize,
intermediate_dim: usize,
model_prefix: &str,
) -> Result<AprTransformerLayer> {
let hf_prefix = format!("{model_prefix}.layers.{layer_idx}");
let gguf_prefix = format!("blk.{layer_idx}");
let attn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.input_layernorm.weight"),
&format!("{gguf_prefix}.attn_norm.weight"),
)?;
let q_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.q_proj.weight"),
&format!("{gguf_prefix}.attn_q.weight"),
)?;
let k_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.k_proj.weight"),
&format!("{gguf_prefix}.attn_k.weight"),
)?;
let v_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.v_proj.weight"),
&format!("{gguf_prefix}.attn_v.weight"),
)?;
let head_dim = hidden_dim / num_heads;
let kv_dim = head_dim * num_kv_heads;
let qkv_weight =
Self::concat_qkv_transposed(&q_weight, &k_weight, &v_weight, hidden_dim, kv_dim);
let qkv_bias = Self::try_concat_qkv_bias_dual_generic(
source,
&hf_prefix,
&gguf_prefix,
hidden_dim,
kv_dim,
);
let attn_output_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.self_attn.o_proj.weight"),
&format!("{gguf_prefix}.attn_output.weight"),
)?;
let attn_output_weight = Self::transpose_weight(&attn_output_raw, hidden_dim, hidden_dim);
let ffn_norm_weight = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.post_attention_layernorm.weight"),
&format!("{gguf_prefix}.ffn_norm.weight"),
)?;
let has_dense_ffn = source.has_tensor(&format!("{hf_prefix}.mlp.gate_proj.weight"))
|| source.has_tensor(&format!("{gguf_prefix}.ffn_gate.weight"));
let (ffn_gate_weight, ffn_up_weight, ffn_down_weight) = if has_dense_ffn {
let gate_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.gate_proj.weight"),
&format!("{gguf_prefix}.ffn_gate.weight"),
)?;
let up_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.up_proj.weight"),
&format!("{gguf_prefix}.ffn_up.weight"),
)?;
let down_raw = Self::get_tensor_with_fallback_generic(
source,
&format!("{hf_prefix}.mlp.down_proj.weight"),
&format!("{gguf_prefix}.ffn_down.weight"),
)?;
(
Some(Self::transpose_weight(&gate_raw, intermediate_dim, hidden_dim)),
Self::transpose_weight(&up_raw, intermediate_dim, hidden_dim),
Self::transpose_weight(&down_raw, hidden_dim, intermediate_dim),
)
} else {
(None, vec![0.0; intermediate_dim * hidden_dim], vec![0.0; hidden_dim * intermediate_dim])
};
Ok(AprTransformerLayer {
attn_norm_weight,
attn_norm_bias: None,
qkv_weight,
qkv_bias,
attn_output_weight,
attn_output_bias: None,
ffn_gate_weight,
ffn_gate_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_norm_weight: Some(ffn_norm_weight),
ffn_norm_bias: None,
attn_q_norm_weight: source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.q_norm.weight"))
.or_else(|_| source.get_tensor_auto(&format!("{gguf_prefix}.attn_q_norm.weight")))
.ok(),
attn_k_norm_weight: source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.k_norm.weight"))
.or_else(|_| source.get_tensor_auto(&format!("{gguf_prefix}.attn_k_norm.weight")))
.ok(),
linear_attn_z_weight: None,
linear_attn_b_weight: None,
linear_attn_a_weight: None,
linear_attn_conv1d_weight: None,
linear_attn_a_log: None,
linear_attn_dt_bias: None,
linear_attn_norm_weight: None,
moe_gate_weight: None,
moe_expert_gate_up: None,
moe_expert_down: None,
moe_shared_gate: None,
moe_shared_up: None,
moe_shared_down: None,
moe_shared_expert_gate_weight: None,
})
}
fn load_moe_weights<S: TensorSource>(
source: &S,
layer_idx: usize,
model_prefix: &str,
config: &SafetensorsConfig,
hidden_dim: usize,
layer: &mut AprTransformerLayer,
) -> Result<()> {
let prefix = format!("{model_prefix}.layers.{layer_idx}");
let gate_name = format!("{prefix}.mlp.gate.weight");
if let Ok(gate) = source.get_tensor_auto(&gate_name) {
layer.moe_gate_weight = Some(gate);
}
let gate_up_name = format!("{prefix}.mlp.experts.gate_up_proj");
if let Ok(gate_up) = source.get_tensor_auto(&gate_up_name) {
layer.moe_expert_gate_up = Some(gate_up);
let down_name = format!("{prefix}.mlp.experts.down_proj");
if let Ok(down) = source.get_tensor_auto(&down_name) {
layer.moe_expert_down = Some(down);
}
} else {
let num_experts = config.num_experts.unwrap_or(0);
let moe_intermediate = config.moe_intermediate_size.unwrap_or(0);
if num_experts > 0 && moe_intermediate > 0 {
let mut gate_up_packed = Vec::with_capacity(
num_experts * 2 * moe_intermediate * hidden_dim,
);
let mut down_packed = Vec::with_capacity(
num_experts * hidden_dim * moe_intermediate,
);
let mut found_any = false;
for e in 0..num_experts {
let gate_name = format!("{prefix}.mlp.experts.{e}.gate_proj.weight");
let up_name = format!("{prefix}.mlp.experts.{e}.up_proj.weight");
let down_name = format!("{prefix}.mlp.experts.{e}.down_proj.weight");
match (
source.get_tensor_auto(&gate_name),
source.get_tensor_auto(&up_name),
source.get_tensor_auto(&down_name),
) {
(Ok(gate), Ok(up), Ok(down)) => {
found_any = true;
gate_up_packed.extend_from_slice(&gate);
gate_up_packed.extend_from_slice(&up);
down_packed.extend_from_slice(&down);
}
_ => {
if found_any {
break;
}
}
}
}
if found_any {
layer.moe_expert_gate_up = Some(gate_up_packed);
layer.moe_expert_down = Some(down_packed);
}
}
}
let shared_intermediate = config.shared_expert_intermediate_size
.or(config.moe_intermediate_size)
.unwrap_or(0);
if shared_intermediate > 0 {
let shared_gate_name = format!("{prefix}.mlp.shared_expert.gate_proj.weight");
if let Ok(g) = source.get_tensor_auto(&shared_gate_name) {
layer.moe_shared_gate = Some(g);
}
let shared_up_name = format!("{prefix}.mlp.shared_expert.up_proj.weight");
if let Ok(u) = source.get_tensor_auto(&shared_up_name) {
layer.moe_shared_up = Some(u);
}
let shared_down_name = format!("{prefix}.mlp.shared_expert.down_proj.weight");
if let Ok(d) = source.get_tensor_auto(&shared_down_name) {
layer.moe_shared_down = Some(d);
}
}
let shared_gate_name = format!("{prefix}.mlp.shared_expert_gate.weight");
if let Ok(sg) = source.get_tensor_auto(&shared_gate_name) {
layer.moe_shared_expert_gate_weight = Some(sg);
}
Ok(())
}
#[allow(clippy::unused_self)]
pub fn transpose_weight(weight: &[f32], _out_dim: usize, _in_dim: usize) -> Vec<f32> {
weight.to_vec()
}
pub fn concat_qkv_transposed(
q: &[f32],
k: &[f32],
v: &[f32],
_hidden_dim: usize,
_kv_dim: usize,
) -> Vec<f32> {
let mut qkv = Vec::with_capacity(q.len() + k.len() + v.len());
qkv.extend_from_slice(q);
qkv.extend_from_slice(k);
qkv.extend_from_slice(v);
qkv
}
fn concat_qkv(q: &[f32], k: &[f32], v: &[f32]) -> Vec<f32> {
let mut qkv = Vec::with_capacity(q.len() + k.len() + v.len());
qkv.extend_from_slice(q);
qkv.extend_from_slice(k);
qkv.extend_from_slice(v);
qkv
}
#[allow(dead_code)]
fn try_concat_qkv_bias(
st_model: &MappedSafeTensorsModel,
prefix: &str,
hidden_dim: usize,
kv_dim: usize,
) -> Option<Vec<f32>> {
let q_bias = st_model
.get_tensor_auto(&format!("{prefix}.self_attn.q_proj.bias"))
.ok()?;
let k_bias = st_model
.get_tensor_auto(&format!("{prefix}.self_attn.k_proj.bias"))
.ok()?;
let v_bias = st_model
.get_tensor_auto(&format!("{prefix}.self_attn.v_proj.bias"))
.ok()?;
let mut qkv_bias = Vec::with_capacity(hidden_dim + kv_dim + kv_dim);
qkv_bias.extend_from_slice(&q_bias);
qkv_bias.extend_from_slice(&k_bias);
qkv_bias.extend_from_slice(&v_bias);
Some(qkv_bias)
}
#[allow(dead_code)]
fn try_concat_qkv_bias_dual(
st_model: &MappedSafeTensorsModel,
hf_prefix: &str,
gguf_prefix: &str,
hidden_dim: usize,
kv_dim: usize,
) -> Option<Vec<f32>> {
Self::try_concat_qkv_bias_dual_generic(st_model, hf_prefix, gguf_prefix, hidden_dim, kv_dim)
}
fn try_concat_qkv_bias_dual_generic<S: TensorSource>(
source: &S,
hf_prefix: &str,
gguf_prefix: &str,
hidden_dim: usize,
kv_dim: usize,
) -> Option<Vec<f32>> {
let q_bias = source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.q_proj.bias"))
.ok()
.or_else(|| {
source
.get_tensor_auto(&format!("{gguf_prefix}.attn_q.bias"))
.ok()
})?;
let k_bias = source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.k_proj.bias"))
.ok()
.or_else(|| {
source
.get_tensor_auto(&format!("{gguf_prefix}.attn_k.bias"))
.ok()
})?;
let v_bias = source
.get_tensor_auto(&format!("{hf_prefix}.self_attn.v_proj.bias"))
.ok()
.or_else(|| {
source
.get_tensor_auto(&format!("{gguf_prefix}.attn_v.bias"))
.ok()
})?;
let mut qkv_bias = Vec::with_capacity(hidden_dim + kv_dim + kv_dim);
qkv_bias.extend_from_slice(&q_bias);
qkv_bias.extend_from_slice(&k_bias);
qkv_bias.extend_from_slice(&v_bias);
Some(qkv_bias)
}
#[allow(dead_code)]
fn get_tensor_with_fallback(
st_model: &MappedSafeTensorsModel,
hf_name: &str,
gguf_name: &str,
) -> Result<Vec<f32>> {
Self::get_tensor_with_fallback_generic(st_model, hf_name, gguf_name)
}
fn get_tensor_with_fallback_generic<S: TensorSource>(
source: &S,
hf_name: &str,
gguf_name: &str,
) -> Result<Vec<f32>> {
if let Ok(t) = source.get_tensor_auto(hf_name) {
return Ok(t);
}
if let Ok(t) = source.get_tensor_auto(gguf_name) {
return Ok(t);
}
let bare_name = hf_name.strip_prefix("model.").unwrap_or(hf_name);
if bare_name != hf_name {
if let Ok(t) = source.get_tensor_auto(bare_name) {
return Ok(t);
}
}
let available = source.tensor_names();
let sample: Vec<&str> = available.iter().take(5).copied().collect();
Err(RealizarError::UnsupportedOperation {
operation: "get_tensor_auto".to_string(),
reason: format!(
"Tensor not found with names: '{}', '{}', or '{}'. \
Available tensors ({} total): {:?}{}",
hf_name,
gguf_name,
bare_name,
available.len(),
sample,
if available.len() > 5 { ", ..." } else { "" }
),
})
}
#[allow(dead_code)]
fn has_tensor_with_fallback(
st_model: &MappedSafeTensorsModel,
hf_name: &str,
gguf_name: &str,
) -> bool {
Self::has_tensor_with_fallback_generic(st_model, hf_name, gguf_name)
}
fn has_tensor_with_fallback_generic<S: TensorSource>(
source: &S,
hf_name: &str,
gguf_name: &str,
) -> bool {
if source.has_tensor(hf_name) || source.has_tensor(gguf_name) {
return true;
}
let bare_name = hf_name.strip_prefix("model.").unwrap_or(hf_name);
bare_name != hf_name && source.has_tensor(bare_name)
}
#[allow(dead_code)]
fn get_optional_tensor_with_fallback(
st_model: &MappedSafeTensorsModel,
hf_name: &str,
gguf_name: &str,
) -> Option<Vec<f32>> {
st_model
.get_tensor_auto(hf_name)
.ok()
.or_else(|| st_model.get_tensor_auto(gguf_name).ok())
.or_else(|| {
let bare_name = hf_name.strip_prefix("model.")?;
st_model.get_tensor_auto(bare_name).ok()
})
}
}