#[must_use]
pub fn build_minimal_phi2_gguf(
vocab_size: usize,
hidden_dim: usize,
intermediate_dim: usize,
num_heads: usize,
) -> Vec<u8> {
let embed_data = create_f32_embedding_data(vocab_size, hidden_dim);
let norm_data = create_f32_norm_weights(hidden_dim);
let qkv_out_dim = 3 * hidden_dim;
let qkv_data = create_q4_k_data_2d(hidden_dim, qkv_out_dim);
let attn_out_data = create_q4_k_data_2d(hidden_dim, hidden_dim);
let ffn_up_data = create_q4_k_data_2d(hidden_dim, intermediate_dim);
let ffn_down_data = create_q4_k_data_2d(intermediate_dim, hidden_dim);
GGUFBuilder::new()
.architecture("phi2")
.hidden_dim("phi2", hidden_dim as u32)
.num_layers("phi2", 1)
.num_heads("phi2", num_heads as u32)
.num_kv_heads("phi2", num_heads as u32) .context_length("phi2", 256)
.rope_freq_base("phi2", 10000.0)
.rms_epsilon("phi2", 1e-5)
.add_f32_tensor(
"token_embd.weight",
&[vocab_size as u64, hidden_dim as u64],
&embed_data,
)
.add_f32_tensor("blk.0.attn_norm.weight", &[hidden_dim as u64], &norm_data)
.add_q4_k_tensor(
"blk.0.attn_qkv.weight",
&[hidden_dim as u64, qkv_out_dim as u64],
&qkv_data,
)
.add_q4_k_tensor(
"blk.0.attn_output.weight",
&[hidden_dim as u64, hidden_dim as u64],
&attn_out_data,
)
.add_f32_tensor("blk.0.ffn_norm.weight", &[hidden_dim as u64], &norm_data)
.add_q4_k_tensor(
"blk.0.ffn_up.weight",
&[hidden_dim as u64, intermediate_dim as u64],
&ffn_up_data,
)
.add_q4_k_tensor(
"blk.0.ffn_down.weight",
&[intermediate_dim as u64, hidden_dim as u64],
&ffn_down_data,
)
.add_f32_tensor("output_norm.weight", &[hidden_dim as u64], &norm_data)
.build()
}