fn apr_qtype_to_dtype(qtype: u32) -> &'static str {
crate::gguf::GgmlQuantType::from_id(qtype).map_or("F32", crate::gguf::GgmlQuantType::as_str)
}
fn apr_dtype_to_byte(dtype: &str) -> u8 {
crate::gguf::GgmlQuantType::from_str_lossy(dtype).map_or_else(
|| {
eprintln!(
"WARN: Unknown dtype '{}' in dtype_to_byte, writing as F32",
dtype
);
0
},
crate::gguf::GgmlQuantType::as_byte,
)
}
fn write_apr_tensor_entry(
name: &str,
dtype: &str,
shape: &[usize],
offset: u64,
size: u64,
) -> Vec<u8> {
let mut entry = Vec::new();
let name_bytes = name.as_bytes();
entry.extend_from_slice(&(name_bytes.len() as u16).to_le_bytes());
entry.extend_from_slice(name_bytes);
entry.push(apr_dtype_to_byte(dtype));
entry.push(shape.len() as u8);
for &dim in shape {
entry.extend_from_slice(&(dim as u64).to_le_bytes());
}
entry.extend_from_slice(&offset.to_le_bytes());
entry.extend_from_slice(&size.to_le_bytes());
entry
}
impl OwnedQuantizedModel {
#[allow(clippy::disallowed_methods)]
#[allow(clippy::cast_possible_truncation)]
pub fn to_apr_bytes(&self) -> Result<Vec<u8>> {
use crate::apr::{ALIGNMENT, HEADER_SIZE, MAGIC};
let tensors = self.collect_apr_model_tensors();
let metadata = serde_json::json!({
"model_type": "transformer_lm",
"architecture": self.config.architecture,
"vocab_size": self.config.vocab_size,
"hidden_size": self.config.hidden_dim,
"num_layers": self.config.num_layers,
"num_heads": self.config.num_heads,
"num_kv_heads": self.config.num_kv_heads,
"intermediate_size": self.config.intermediate_dim,
"rms_norm_eps": self.config.eps,
"rope_theta": self.config.rope_theta,
"context_length": self.config.context_length,
});
let metadata_bytes =
serde_json::to_vec(&metadata).map_err(|e| RealizarError::FormatError {
reason: format!("Failed to serialize metadata: {e}"),
})?;
let metadata_padded_len = metadata_bytes.len().div_ceil(ALIGNMENT) * ALIGNMENT;
let mut tensor_index_bytes: Vec<u8> = Vec::new();
let mut tensor_data_bytes: Vec<u8> = Vec::new();
for (name, dtype, shape, data) in &tensors {
let padding = (ALIGNMENT - (tensor_data_bytes.len() % ALIGNMENT)) % ALIGNMENT;
tensor_data_bytes.extend(std::iter::repeat_n(0u8, padding));
let offset = tensor_data_bytes.len() as u64;
let size = data.len() as u64;
tensor_index_bytes.extend(write_apr_tensor_entry(
name, dtype, shape, offset, size,
));
tensor_data_bytes.extend_from_slice(data);
}
let metadata_offset = HEADER_SIZE as u64;
let tensor_index_offset = metadata_offset + metadata_padded_len as u64;
let data_offset = tensor_index_offset + tensor_index_bytes.len() as u64;
let mut header = vec![0u8; HEADER_SIZE];
header[0..4].copy_from_slice(&MAGIC);
header[4] = 2; header[5] = 0; header[6..8].copy_from_slice(&0u16.to_le_bytes()); header[8..12].copy_from_slice(&(tensors.len() as u32).to_le_bytes());
header[12..20].copy_from_slice(&metadata_offset.to_le_bytes());
header[20..24].copy_from_slice(&(metadata_bytes.len() as u32).to_le_bytes());
header[24..32].copy_from_slice(&tensor_index_offset.to_le_bytes());
header[32..40].copy_from_slice(&data_offset.to_le_bytes());
let total_size =
HEADER_SIZE + metadata_padded_len + tensor_index_bytes.len() + tensor_data_bytes.len();
let mut result = Vec::with_capacity(total_size);
result.extend_from_slice(&header);
result.extend_from_slice(&metadata_bytes);
result.resize(HEADER_SIZE + metadata_padded_len, 0); result.extend_from_slice(&tensor_index_bytes);
result.extend_from_slice(&tensor_data_bytes);
Ok(result)
}
#[allow(clippy::cast_possible_truncation)]
fn collect_apr_model_tensors(&self) -> Vec<(String, String, Vec<usize>, Vec<u8>)> {
let mut tensors = Vec::new();
let embed_bytes: Vec<u8> = self
.token_embedding
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
tensors.push((
"token_embd.weight".to_string(),
"F32".to_string(),
vec![self.config.vocab_size, self.config.hidden_dim],
embed_bytes,
));
let head_dim = self.config.head_dim();
let kv_dim = self.config.num_kv_heads * head_dim;
for (layer_idx, layer) in self.layers.iter().enumerate() {
self.collect_apr_layer_tensors(&mut tensors, layer_idx, layer, kv_dim);
}
let output_norm_bytes: Vec<u8> = self
.output_norm_weight
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
tensors.push((
"output_norm.weight".to_string(),
"F32".to_string(),
vec![self.config.hidden_dim],
output_norm_bytes,
));
tensors.push((
"output.weight".to_string(),
apr_qtype_to_dtype(self.lm_head_weight.qtype).to_string(),
vec![self.config.vocab_size, self.config.hidden_dim],
self.lm_head_weight.data.clone(),
));
tensors
}
fn collect_apr_layer_tensors(
&self,
tensors: &mut Vec<(String, String, Vec<usize>, Vec<u8>)>,
layer_idx: usize,
layer: &OwnedQuantizedLayer,
kv_dim: usize,
) {
let norm_bytes: Vec<u8> = layer
.attn_norm_weight
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
tensors.push((
format!("blk.{layer_idx}.attn_norm.weight"),
"F32".to_string(),
vec![self.config.hidden_dim],
norm_bytes,
));
match &layer.qkv_weight {
OwnedQKVWeights::Separate { q, k, v } => {
tensors.push((
format!("blk.{layer_idx}.attn_q.weight"),
apr_qtype_to_dtype(q.qtype).to_string(),
vec![self.config.hidden_dim, self.config.hidden_dim],
q.data.clone(),
));
tensors.push((
format!("blk.{layer_idx}.attn_k.weight"),
apr_qtype_to_dtype(k.qtype).to_string(),
vec![kv_dim, self.config.hidden_dim],
k.data.clone(),
));
tensors.push((
format!("blk.{layer_idx}.attn_v.weight"),
apr_qtype_to_dtype(v.qtype).to_string(),
vec![kv_dim, self.config.hidden_dim],
v.data.clone(),
));
},
OwnedQKVWeights::Fused(t) => {
tensors.push((
format!("blk.{layer_idx}.attn_qkv.weight"),
apr_qtype_to_dtype(t.qtype).to_string(),
vec![t.out_dim, t.in_dim],
t.data.clone(),
));
},
}
tensors.push((
format!("blk.{layer_idx}.attn_output.weight"),
apr_qtype_to_dtype(layer.attn_output_weight.qtype).to_string(),
vec![self.config.hidden_dim, self.config.hidden_dim],
layer.attn_output_weight.data.clone(),
));
if let Some(ref ffn_norm) = layer.ffn_norm_weight {
let norm_bytes: Vec<u8> = ffn_norm.iter().flat_map(|f| f.to_le_bytes()).collect();
tensors.push((
format!("blk.{layer_idx}.ffn_norm.weight"),
"F32".to_string(),
vec![self.config.hidden_dim],
norm_bytes,
));
}
if let Some(ref gate) = layer.ffn_gate_weight {
tensors.push((
format!("blk.{layer_idx}.ffn_gate.weight"),
apr_qtype_to_dtype(gate.qtype).to_string(),
vec![self.config.intermediate_dim, self.config.hidden_dim],
gate.data.clone(),
));
}
tensors.push((
format!("blk.{layer_idx}.ffn_up.weight"),
apr_qtype_to_dtype(layer.ffn_up_weight.qtype).to_string(),
vec![self.config.intermediate_dim, self.config.hidden_dim],
layer.ffn_up_weight.data.clone(),
));
tensors.push((
format!("blk.{layer_idx}.ffn_down.weight"),
apr_qtype_to_dtype(layer.ffn_down_weight.qtype).to_string(),
vec![self.config.hidden_dim, self.config.intermediate_dim],
layer.ffn_down_weight.data.clone(),
));
}
}
include!("embedding.rs");
include!("loader_apr_quantized.rs");