fn build_gguf_arch_metadata(
apr_metadata: &crate::format::v2::AprV2Metadata,
) -> Vec<(String, crate::format::gguf::GgufValue)> {
use crate::format::gguf::GgufValue;
let arch = resolve_architecture(apr_metadata);
let hidden_size = apr_metadata
.hidden_size
.expect("C-07: hidden_size required for GGUF export (missing in APR metadata)");
let num_layers = apr_metadata
.num_layers
.expect("C-07: num_layers required for GGUF export (missing in APR metadata)");
let num_heads = apr_metadata
.num_heads
.expect("C-07: num_heads required for GGUF export (missing in APR metadata)");
let num_kv_heads = apr_metadata.num_kv_heads.unwrap_or(num_heads);
let vocab_size = apr_metadata
.vocab_size
.expect("C-07: vocab_size required for GGUF export (missing in APR metadata)");
let intermediate_size = apr_metadata
.intermediate_size
.expect("C-07: intermediate_size required for GGUF export (missing in APR metadata)");
let max_pos = apr_metadata.max_position_embeddings.unwrap_or(0);
let rope_theta = apr_metadata.rope_theta.unwrap_or_else(||
super::export::default_rope_theta_for_architecture(arch));
let rms_norm_eps = apr_metadata.rms_norm_eps.unwrap_or(1e-6);
let head_dim = if num_heads > 0 {
hidden_size / num_heads
} else {
0
};
let model_name = apr_metadata
.name
.clone()
.unwrap_or_else(|| "model".to_string());
let mut metadata = vec![
(
"general.architecture".to_string(),
GgufValue::String(arch.to_string()),
),
("general.name".to_string(), GgufValue::String(model_name)),
(
"general.quantization_version".to_string(),
GgufValue::Uint32(2),
),
("general.file_type".to_string(), GgufValue::Uint32(0)),
(
format!("{arch}.context_length"),
GgufValue::Uint32(max_pos as u32),
),
(
format!("{arch}.embedding_length"),
GgufValue::Uint32(hidden_size as u32),
),
(
format!("{arch}.block_count"),
GgufValue::Uint32(num_layers as u32),
),
(
format!("{arch}.feed_forward_length"),
GgufValue::Uint32(intermediate_size as u32),
),
(
format!("{arch}.attention.head_count"),
GgufValue::Uint32(num_heads as u32),
),
(
format!("{arch}.attention.head_count_kv"),
GgufValue::Uint32(num_kv_heads as u32),
),
];
if arch == "gpt2" {
metadata.push((
format!("{arch}.attention.layer_norm_epsilon"),
GgufValue::Float32(rms_norm_eps),
));
} else {
metadata.push((
format!("{arch}.attention.layer_norm_rms_epsilon"),
GgufValue::Float32(rms_norm_eps),
));
}
if uses_rope(arch) {
metadata.push((
format!("{arch}.rope.dimension_count"),
GgufValue::Uint32(head_dim as u32),
));
metadata.push((
format!("{arch}.rope.freq_base"),
GgufValue::Float32(rope_theta),
));
}
metadata.push((
format!("{arch}.vocab_size"),
GgufValue::Uint32(vocab_size as u32),
));
metadata
}
fn push_string_array(
entries: &mut Vec<(String, crate::format::gguf::GgufValue)>,
custom: &std::collections::HashMap<String, serde_json::Value>,
src_key: &str,
gguf_key: &str,
) {
let arr = custom.get(src_key).and_then(|v| v.as_array());
let Some(arr) = arr else { return };
let strings: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
if !strings.is_empty() {
entries.push((
gguf_key.to_string(),
crate::format::gguf::GgufValue::ArrayString(strings),
));
}
}
fn push_u32_field(
entries: &mut Vec<(String, crate::format::gguf::GgufValue)>,
custom: &std::collections::HashMap<String, serde_json::Value>,
src_key: &str,
gguf_key: &str,
) {
if let Some(val) = custom.get(src_key).and_then(|v| v.as_u64()) {
entries.push((
gguf_key.to_string(),
crate::format::gguf::GgufValue::Uint32(val as u32),
));
}
}
fn push_i32_array(
entries: &mut Vec<(String, crate::format::gguf::GgufValue)>,
custom: &std::collections::HashMap<String, serde_json::Value>,
src_key: &str,
gguf_key: &str,
) {
let arr = custom.get(src_key).and_then(|v| v.as_array());
let Some(arr) = arr else { return };
let types: Vec<i32> = arr
.iter()
.filter_map(|v| v.as_i64().map(|n| n as i32))
.collect();
if !types.is_empty() {
entries.push((
gguf_key.to_string(),
crate::format::gguf::GgufValue::ArrayInt32(types),
));
}
}
fn extract_apr_tokenizer_for_gguf(
apr_metadata: &crate::format::v2::AprV2Metadata,
) -> Vec<(String, crate::format::gguf::GgufValue)> {
use crate::format::gguf::GgufValue;
let mut entries = Vec::new();
let custom = &apr_metadata.custom;
let arch = resolve_architecture(apr_metadata);
let raw_model_type = custom
.get("tokenizer.model")
.and_then(|v| v.as_str())
.unwrap_or("gpt2");
let model_type = match raw_model_type {
"bpe" => "gpt2",
other => other,
};
entries.push((
"tokenizer.ggml.model".to_string(),
GgufValue::String(model_type.to_string()),
));
let model_name = apr_metadata.name.as_deref().unwrap_or("");
let pre_type = custom
.get("tokenizer.pre_type")
.and_then(|v| v.as_str())
.unwrap_or_else(|| resolve_pre_tokenizer_type(arch, model_name));
entries.push((
"tokenizer.ggml.pre".to_string(),
GgufValue::String(pre_type.to_string()),
));
push_string_array(
&mut entries,
custom,
"tokenizer.vocabulary",
"tokenizer.ggml.tokens",
);
push_string_array(
&mut entries,
custom,
"tokenizer.merges",
"tokenizer.ggml.merges",
);
push_u32_field(
&mut entries,
custom,
"tokenizer.bos_token_id",
"tokenizer.ggml.bos_token_id",
);
push_u32_field(
&mut entries,
custom,
"tokenizer.eos_token_id",
"tokenizer.ggml.eos_token_id",
);
push_i32_array(
&mut entries,
custom,
"tokenizer.token_type",
"tokenizer.ggml.token_type",
);
push_u32_field(
&mut entries,
custom,
"tokenizer.padding_token_id",
"tokenizer.ggml.padding_token_id",
);
if let Some(add_bos) = custom
.get("tokenizer.add_bos_token")
.and_then(|v| v.as_bool())
{
entries.push((
"tokenizer.ggml.add_bos_token".to_string(),
GgufValue::Bool(add_bos),
));
}
let chat_tmpl = apr_metadata.chat_template.as_deref().or_else(|| {
custom
.get("tokenizer.chat_template")
.and_then(|v| v.as_str())
});
if let Some(tmpl) = chat_tmpl {
entries.push((
"tokenizer.chat_template".to_string(),
GgufValue::String(tmpl.to_string()),
));
}
entries
}
fn export_mlx(
tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
input_path: &Path,
output_path: &Path,
options: &ExportOptions,
) -> Result<()> {
fs::create_dir_all(output_path).map_err(|e| AprenderError::FormatError {
message: format!("Failed to create MLX output directory: {e}"),
})?;
let weights_path = output_path.join("model.safetensors");
let user_metadata = extract_user_metadata(input_path);
if user_metadata.is_empty() {
save_safetensors(&weights_path, tensors).map_err(|e| AprenderError::FormatError {
message: format!("Failed to write MLX weights: {e}"),
})?;
} else {
save_safetensors_with_metadata(&weights_path, tensors, &user_metadata).map_err(|e| {
AprenderError::FormatError {
message: format!("Failed to write MLX weights: {e}"),
}
})?;
}
let config = infer_model_config(tensors);
let config_path = output_path.join("config.json");
fs::write(&config_path, config).map_err(|e| AprenderError::FormatError {
message: format!("Failed to write MLX config.json: {e}"),
})?;
if options.include_tokenizer {
let tokenizer_json = infer_tokenizer_json(input_path);
if !tokenizer_json.is_empty() {
let tokenizer_path = output_path.join("tokenizer.json");
if let Err(e) = fs::write(&tokenizer_path, &tokenizer_json) {
eprintln!("[GH-246] Warning: Failed to write tokenizer.json: {e}");
}
}
}
Ok(())
}
fn export_apr_to_gguf_raw(input: &Path, output: &Path) -> Result<ExportReport> {
use crate::format::gguf::{export_tensors_to_gguf, GgmlType, GgufTensor};
use crate::format::v2::{AprV2Reader, TensorDType};
use std::fs::File;
use std::io::BufWriter;
let data = fs::read(input).map_err(|e| AprenderError::FormatError {
message: format!("Failed to read APR file: {e}"),
})?;
let original_size = data.len();
let reader = AprV2Reader::from_bytes(&data).map_err(|e| AprenderError::FormatError {
message: format!("Failed to parse APR file: {e:?}"),
})?;
let apr_metadata = reader.metadata().clone();
let arch = resolve_architecture(&apr_metadata);
let num_layers = apr_metadata
.num_layers
.expect("C-07: num_layers required for GGUF export");
let num_heads = apr_metadata
.num_heads
.expect("C-07: num_heads required for GGUF export");
let num_kv_heads = apr_metadata.num_kv_heads.unwrap_or(num_heads);
let hidden_size = apr_metadata
.hidden_size
.expect("C-07: hidden_size required for GGUF export");
let mut metadata = build_gguf_arch_metadata(&apr_metadata);
metadata.extend(extract_apr_tokenizer_for_gguf(&apr_metadata));
let validated = ValidatedGgufMetadata::validate(metadata)?;
eprintln!(
"[PMAT-252] Writing {} metadata keys (arch={}, layers={}, heads={}/{}kv, hidden={})",
validated.as_slice().len(),
arch,
num_layers,
num_heads,
num_kv_heads,
hidden_size
);
let mapper = build_gguf_mapper(arch);
let tensor_names = reader.tensor_names();
let mut gguf_tensors = Vec::with_capacity(tensor_names.len());
for name in &tensor_names {
let Some(gguf_name) = mapper.map_name(name) else {
eprintln!("[GH-277] Skipping tensor '{}' (not in GGUF contract)", name);
continue;
};
let entry = reader
.get_tensor(name)
.ok_or_else(|| AprenderError::FormatError {
message: format!("Tensor '{}' missing from index", name),
})?;
let raw_bytes = reader
.get_tensor_data(name)
.ok_or_else(|| AprenderError::FormatError {
message: format!("Tensor '{}' data not found", name),
})?;
let gguf_dtype = match entry.dtype {
TensorDType::F32 => GgmlType::F32,
TensorDType::F16 => GgmlType::F16,
TensorDType::Q4K => GgmlType::Q4K,
TensorDType::Q6K => GgmlType::Q6K,
TensorDType::AprQ8 => GgmlType::Q8_0,
TensorDType::BF16 | TensorDType::F64 | TensorDType::I32
| TensorDType::I64 | TensorDType::I8 | TensorDType::U8
| TensorDType::AprQ4 => {
return Err(AprenderError::FormatError {
message: format!(
"Tensor '{}' has dtype {:?} which has no GGUF equivalent. \
Convert to F32/F16 first with `apr convert`.",
name, entry.dtype
),
});
}
};
let gguf_shape = if entry.shape.len() == 2 {
vec![entry.shape[1] as u64, entry.shape[0] as u64]
} else {
entry.shape.iter().map(|&d| d as u64).collect()
};
eprintln!(
"[PMAT-252] '{}': {} bytes (dtype={:?})",
gguf_name,
raw_bytes.len(),
entry.dtype
);
gguf_tensors.push(GgufTensor {
name: gguf_name,
shape: gguf_shape,
dtype: gguf_dtype,
data: raw_bytes.to_vec(),
});
}
let fused = build_fused_tensors_raw(&mapper, &reader);
gguf_tensors.extend(fused);
let file = File::create(output).map_err(|e| AprenderError::FormatError {
message: format!("Failed to create output file: {e}"),
})?;
let mut writer = BufWriter::new(file);
export_tensors_to_gguf(&mut writer, &gguf_tensors, validated.as_slice())?;
let exported_size = fs::metadata(output).map(|m| m.len() as usize).unwrap_or(0);
Ok(ExportReport {
original_size,
exported_size,
tensor_count: gguf_tensors.len(),
format: ExportFormat::Gguf,
quantization: Some(QuantizationType::Q4K),
})
}
#[cfg(test)]
fn hf_to_gguf_name(name: &str) -> String {
let mapper = build_legacy_mapper();
mapper.map_name(name).unwrap_or_else(|| name.to_string())
}