use crate::error::{AprenderError, Result};
use crate::format::converter_types::{Architecture, QuantizationType};
use crate::format::gguf::GgufReader;
use crate::format::layout_contract::contract;
use crate::serialization::safetensors::{
save_safetensors, save_safetensors_typed, save_safetensors_with_metadata,
save_safetensors_with_metadata_typed, UserMetadata,
};
use std::collections::BTreeMap;
use std::fs;
use std::path::Path;
use super::{
calculate_tensor_size, load_model_tensors_provenance, map_tensor_names, quantize_tensors,
NativeF32Tensors,
};
fn resolve_architecture(apr_metadata: &crate::format::v2::AprV2Metadata) -> &str {
apr_metadata
.architecture
.as_deref()
.or_else(|| {
let mt = &apr_metadata.model_type;
if mt.is_empty() || mt == "unknown" {
None
} else {
Some(mt.as_str())
}
})
.unwrap_or("unknown")
}
pub(crate) fn default_rope_theta_for_architecture(arch: &str) -> f32 {
let arch_lower = arch.to_lowercase();
if arch_lower.contains("qwen") {
1_000_000.0
} else {
10_000.0
}
}
fn resolve_pre_tokenizer_type(arch: &str, model_name: &str) -> &'static str {
let name_lower = model_name.to_lowercase();
if name_lower.contains("smollm") {
return "default";
}
match arch {
"gpt2" => "gpt-2",
"qwen2" | "qwen2.5" | "qwen" => "qwen2",
"llama" | "mistral" => "default",
"phi" | "phi3" => "default",
"gemma" | "gemma2" => "default",
"deepseek" | "deepseek2" => "deepseek",
"starcoder" | "starcoder2" => "starcoder",
_ => "default", }
}
fn uses_rope(arch: &str) -> bool {
!matches!(arch, "gpt2" | "starcoder")
}
struct GgufNameMapper {
global: std::collections::HashMap<String, String>,
per_layer: std::collections::HashMap<String, String>,
skip_suffixes: std::collections::HashSet<String>,
transpose_weights: bool,
fusion_rules: Vec<FusionExportRule>,
}
struct FusionExportRule {
gguf_suffix: String,
apr_suffixes: Vec<String>,
}
fn build_global_mappings(
tt: &crate::format::model_family::TensorTemplate,
gt: &crate::format::model_family::GgufTensorTemplate,
) -> std::collections::HashMap<String, String> {
let mut global = std::collections::HashMap::new();
if let Some(gguf_name) = >.embedding {
global.insert(tt.embedding.clone(), gguf_name.clone());
}
if let (Some(apr_name), Some(gguf_name)) = (&tt.lm_head, >.lm_head) {
global.insert(apr_name.clone(), gguf_name.clone());
}
if let (Some(apr_name), Some(gguf_name)) = (&tt.final_norm, >.final_norm_weight) {
global.insert(apr_name.clone(), gguf_name.clone());
}
if let (Some(apr_norm), Some(gguf_bias)) = (&tt.final_norm, >.final_norm_bias) {
let apr_bias = apr_norm.replace(".weight", ".bias");
global.insert(apr_bias, gguf_bias.clone());
}
if let Some(gguf_name) = >.position_embedding {
if let Some(Some(apr_name)) = tt.per_layer.get("position_embedding") {
global.insert(apr_name.clone(), gguf_name.clone());
} else {
global.insert(
"model.position_embedding.weight".to_string(),
gguf_name.clone(),
);
}
}
global
}
fn resolve_fusion_rules(
tt: &crate::format::model_family::TensorTemplate,
gt: &crate::format::model_family::GgufTensorTemplate,
) -> Vec<FusionExportRule> {
gt.fuse
.iter()
.filter_map(|rule| {
let apr_suffixes: Vec<String> = rule
.source_roles
.iter()
.filter_map(|role| {
tt.per_layer.get(role.as_str()).and_then(|opt| {
opt.as_ref().map(|pat| {
pat.strip_prefix("model.layers.{n}.")
.unwrap_or(pat)
.to_string()
})
})
})
.collect();
if apr_suffixes.len() == rule.source_roles.len() {
Some(FusionExportRule {
gguf_suffix: rule.gguf_suffix.clone(),
apr_suffixes,
})
} else {
eprintln!(
"[GH-277] WARNING: Fusion rule `{}` has unresolved source roles, skipping",
rule.gguf_suffix
);
None
}
})
.collect()
}
impl GgufNameMapper {
fn from_contract(config: &crate::format::model_family::ModelFamilyConfig) -> Self {
let tt = &config.tensor_template;
let gt = &config.gguf_tensor_template;
let global = build_global_mappings(tt, gt);
let mut per_layer = std::collections::HashMap::new();
let mut skip_suffixes = std::collections::HashSet::new();
for (role, apr_pattern_opt) in &tt.per_layer {
let Some(apr_pattern) = apr_pattern_opt else {
continue;
};
if role == "position_embedding" {
continue;
}
let apr_suffix = apr_pattern
.strip_prefix("model.layers.{n}.")
.unwrap_or(apr_pattern);
match gt.per_layer.get(role) {
Some(Some(gguf_suffix)) => {
per_layer.insert(apr_suffix.to_string(), gguf_suffix.clone());
}
Some(None) => {
skip_suffixes.insert(apr_suffix.to_string());
}
None => {}
}
}
let fusion_rules = resolve_fusion_rules(tt, gt);
Self {
global,
per_layer,
skip_suffixes,
transpose_weights: gt.transpose_weights,
fusion_rules,
}
}
fn map_name(&self, apr_name: &str) -> Option<String> {
if let Some(gguf_name) = self.global.get(apr_name) {
return Some(gguf_name.clone());
}
if let Some(rest) = apr_name.strip_prefix("model.layers.") {
if let Some(dot_pos) = rest.find('.') {
let layer_num = &rest[..dot_pos];
let suffix = &rest[dot_pos + 1..];
if self.skip_suffixes.contains(suffix) {
return None;
}
if let Some(gguf_suffix) = self.per_layer.get(suffix) {
return Some(format!("blk.{layer_num}.{gguf_suffix}"));
}
}
}
eprintln!(
"[GH-277] WARNING: No GGUF mapping for tensor '{}', passing through",
apr_name
);
Some(apr_name.to_string())
}
fn fusion_rules(&self) -> &[FusionExportRule] {
&self.fusion_rules
}
fn needs_transpose(&self) -> bool {
self.transpose_weights
}
}
fn build_gguf_mapper(arch: &str) -> GgufNameMapper {
let registry = crate::format::model_family::build_default_registry();
let family = registry
.get(arch)
.or_else(|| registry.detect_from_model_type(arch));
match family {
Some(f) => {
let config = f.config();
if config.gguf_tensor_template.embedding.is_some() {
eprintln!(
"[GH-277] Using contract-driven GGUF mapping for family '{}'",
config.family
);
GgufNameMapper::from_contract(config)
} else {
eprintln!(
"[GH-277] Family '{}' has no gguf_tensor_template, using legacy mapping",
config.family
);
build_legacy_mapper()
}
}
None => {
eprintln!(
"[GH-277] No family contract for arch '{}', using legacy mapping",
arch
);
build_legacy_mapper()
}
}
}
fn build_legacy_mapper() -> GgufNameMapper {
let mut global = std::collections::HashMap::new();
global.insert(
"model.embed_tokens.weight".to_string(),
"token_embd.weight".to_string(),
);
global.insert("lm_head.weight".to_string(), "output.weight".to_string());
global.insert(
"model.norm.weight".to_string(),
"output_norm.weight".to_string(),
);
let mut per_layer = std::collections::HashMap::new();
per_layer.insert(
"self_attn.q_proj.weight".to_string(),
"attn_q.weight".to_string(),
);
per_layer.insert(
"self_attn.q_proj.bias".to_string(),
"attn_q.bias".to_string(),
);
per_layer.insert(
"self_attn.k_proj.weight".to_string(),
"attn_k.weight".to_string(),
);
per_layer.insert(
"self_attn.k_proj.bias".to_string(),
"attn_k.bias".to_string(),
);
per_layer.insert(
"self_attn.v_proj.weight".to_string(),
"attn_v.weight".to_string(),
);
per_layer.insert(
"self_attn.v_proj.bias".to_string(),
"attn_v.bias".to_string(),
);
per_layer.insert(
"self_attn.o_proj.weight".to_string(),
"attn_output.weight".to_string(),
);
per_layer.insert(
"self_attn.o_proj.bias".to_string(),
"attn_output.bias".to_string(),
);
per_layer.insert(
"self_attn.qkv_proj.weight".to_string(),
"attn_qkv.weight".to_string(),
);
per_layer.insert(
"self_attn.qkv_proj.bias".to_string(),
"attn_qkv.bias".to_string(),
);
per_layer.insert(
"input_layernorm.weight".to_string(),
"attn_norm.weight".to_string(),
);
per_layer.insert(
"self_attn.q_norm.weight".to_string(),
"attn_q_norm.weight".to_string(),
);
per_layer.insert(
"self_attn.k_norm.weight".to_string(),
"attn_k_norm.weight".to_string(),
);
per_layer.insert(
"mlp.gate_proj.weight".to_string(),
"ffn_gate.weight".to_string(),
);
per_layer.insert(
"mlp.up_proj.weight".to_string(),
"ffn_up.weight".to_string(),
);
per_layer.insert(
"mlp.down_proj.weight".to_string(),
"ffn_down.weight".to_string(),
);
per_layer.insert(
"post_attention_layernorm.weight".to_string(),
"ffn_norm.weight".to_string(),
);
GgufNameMapper {
global,
per_layer,
skip_suffixes: std::collections::HashSet::new(),
transpose_weights: false,
fusion_rules: Vec::new(),
}
}
fn detect_num_layers_from_names<'a>(names: impl Iterator<Item = &'a str>) -> usize {
let mut max_layer = 0usize;
for name in names {
if let Some(rest) = name.strip_prefix("model.layers.") {
if let Some(dot_pos) = rest.find('.') {
if let Ok(n) = rest[..dot_pos].parse::<usize>() {
max_layer = max_layer.max(n + 1);
}
}
}
}
max_layer
}
fn transpose_2d_f32(data: &[f32], rows: usize, cols: usize) -> Vec<f32> {
contract_pre_transpose_involution!(data);
let mut out = vec![0.0f32; rows * cols];
trueno::blis::transpose::transpose(rows, cols, data, &mut out)
.expect("transpose_2d_f32: dimension mismatch");
out
}
fn compute_fused_shape(shapes: &[Vec<usize>]) -> Option<Vec<usize>> {
let first = shapes.first()?;
match first.len() {
2 => {
let total_rows: usize = shapes.iter().map(|s| s[0]).sum();
Some(vec![total_rows, first[1]])
}
1 => {
let total: usize = shapes.iter().map(|s| s[0]).sum();
Some(vec![total])
}
_ => None,
}
}
#[provable_contracts_macros::ensures(ret.len() == shape.len())]
fn shape_to_gguf(shape: &[usize]) -> Vec<u64> {
if shape.len() == 2 {
vec![shape[1] as u64, shape[0] as u64]
} else {
shape.iter().map(|&d| d as u64).collect()
}
}
include!("export_include.rs");
include!("fusion.rs");
include!("apr_export_fn.rs");