impl ModelFixture {
fn generate_apr_data(config: &ModelConfig) -> Vec<u8> {
let mut tensor_defs: Vec<(&str, Vec<usize>, u8)> = Vec::new();
let head_dim = config.hidden_dim / config.num_heads;
let kv_dim = config.num_kv_heads * head_dim;
tensor_defs.push((
"model.embed_tokens.weight",
vec![config.vocab_size, config.hidden_dim],
0, ));
tensor_defs.push(("model.norm.weight", vec![config.hidden_dim], 0));
tensor_defs.push((
"lm_head.weight",
vec![config.vocab_size, config.hidden_dim],
0,
));
for i in 0..config.num_layers {
let prefix = format!("model.layers.{i}");
tensor_defs.push((
Box::leak(format!("{prefix}.input_layernorm.weight").into_boxed_str()),
vec![config.hidden_dim],
0,
));
tensor_defs.push((
Box::leak(format!("{prefix}.self_attn.q_proj.weight").into_boxed_str()),
vec![config.hidden_dim, config.hidden_dim],
0,
));
tensor_defs.push((
Box::leak(format!("{prefix}.self_attn.k_proj.weight").into_boxed_str()),
vec![kv_dim, config.hidden_dim],
0,
));
tensor_defs.push((
Box::leak(format!("{prefix}.self_attn.v_proj.weight").into_boxed_str()),
vec![kv_dim, config.hidden_dim],
0,
));
tensor_defs.push((
Box::leak(format!("{prefix}.self_attn.o_proj.weight").into_boxed_str()),
vec![config.hidden_dim, config.hidden_dim],
0,
));
tensor_defs.push((
Box::leak(format!("{prefix}.post_attention_layernorm.weight").into_boxed_str()),
vec![config.hidden_dim],
0,
));
tensor_defs.push((
Box::leak(format!("{prefix}.mlp.gate_proj.weight").into_boxed_str()),
vec![config.intermediate_dim, config.hidden_dim],
0,
));
tensor_defs.push((
Box::leak(format!("{prefix}.mlp.up_proj.weight").into_boxed_str()),
vec![config.intermediate_dim, config.hidden_dim],
0,
));
tensor_defs.push((
Box::leak(format!("{prefix}.mlp.down_proj.weight").into_boxed_str()),
vec![config.hidden_dim, config.intermediate_dim],
0,
));
}
let tensor_count = tensor_defs.len();
let mut tensor_data_offsets: Vec<u64> = Vec::new();
let mut tensor_sizes: Vec<u64> = Vec::new();
let mut current_offset: u64 = 0;
for (_, shape, dtype) in &tensor_defs {
let element_count: usize = shape.iter().product();
let bytes_per_element = match dtype {
0 => 4, 1 => 2, _ => 4, };
let size = (element_count * bytes_per_element) as u64;
tensor_data_offsets.push(current_offset);
tensor_sizes.push(size);
current_offset += size;
current_offset = current_offset.div_ceil(64) * 64;
}
let total_tensor_data_size = current_offset;
let mut data = Vec::new();
data.extend_from_slice(b"APR\x00");
data.push(2); data.push(0); data.extend_from_slice(&0u16.to_le_bytes());
data.extend_from_slice(&(tensor_count as u32).to_le_bytes());
data.extend_from_slice(&64u64.to_le_bytes());
let metadata_size_offset = data.len();
data.extend_from_slice(&0u32.to_le_bytes());
let tensor_index_offset_pos = data.len();
data.extend_from_slice(&0u64.to_le_bytes());
let data_offset_pos = data.len();
data.extend_from_slice(&0u64.to_le_bytes());
data.extend_from_slice(&0u32.to_le_bytes());
data.resize(64, 0);
let metadata = format!(
r#"{{"architecture":"{}","hidden_size":{},"num_layers":{},"num_heads":{},"num_kv_heads":{},"vocab_size":{},"intermediate_size":{},"rope_theta":{},"rms_norm_eps":{}}}"#,
config.architecture,
config.hidden_dim,
config.num_layers,
config.num_heads,
config.num_kv_heads,
config.vocab_size,
config.intermediate_dim,
config.rope_theta,
config.eps
);
let metadata_bytes = metadata.as_bytes();
let metadata_size = metadata_bytes.len() as u32;
data[metadata_size_offset..metadata_size_offset + 4]
.copy_from_slice(&metadata_size.to_le_bytes());
data.extend_from_slice(metadata_bytes);
let padded_len = data.len().div_ceil(64) * 64;
data.resize(padded_len, 0);
let tensor_index_offset = data.len() as u64;
data[tensor_index_offset_pos..tensor_index_offset_pos + 8]
.copy_from_slice(&tensor_index_offset.to_le_bytes());
for (i, (name, shape, dtype)) in tensor_defs.iter().enumerate() {
let name_bytes = name.as_bytes();
data.extend_from_slice(&(name_bytes.len() as u16).to_le_bytes());
data.extend_from_slice(name_bytes);
data.push(*dtype);
data.push(shape.len() as u8);
for &dim in shape {
data.extend_from_slice(&(dim as u64).to_le_bytes());
}
data.extend_from_slice(&tensor_data_offsets[i].to_le_bytes());
data.extend_from_slice(&tensor_sizes[i].to_le_bytes());
}
let padded_len = data.len().div_ceil(64) * 64;
data.resize(padded_len, 0);
let data_offset = data.len() as u64;
data[data_offset_pos..data_offset_pos + 8].copy_from_slice(&data_offset.to_le_bytes());
data.resize(data.len() + total_tensor_data_size as usize, 0);
data
}
fn create_q4k_data(in_dim: usize, out_dim: usize) -> Vec<u8> {
let super_blocks_per_row = in_dim.div_ceil(256);
let bytes_per_row = super_blocks_per_row * 144;
let data_size = out_dim * bytes_per_row;
let mut data = vec![0u8; data_size];
for row in 0..out_dim {
for sb in 0..super_blocks_per_row {
let offset = row * bytes_per_row + sb * 144;
if offset + 4 <= data.len() {
data[offset..offset + 2].copy_from_slice(&0x3C00_u16.to_le_bytes());
data[offset + 2..offset + 4].copy_from_slice(&0x0000_u16.to_le_bytes());
}
}
}
data
}
}
include!("mod_gguf_try_model.rs");