#[must_use]
pub fn create_q4_0_data(num_elements: usize) -> Vec<u8> {
let num_blocks = num_elements.div_ceil(32);
let mut data = Vec::with_capacity(num_blocks * 18);
for _ in 0..num_blocks {
let scale = half::f16::from_f32(0.1);
data.extend_from_slice(&scale.to_le_bytes());
data.extend([0x88u8; 16]);
}
data
}
#[must_use]
pub fn create_q8_0_data(num_elements: usize) -> Vec<u8> {
let num_blocks = num_elements.div_ceil(32);
let mut data = Vec::with_capacity(num_blocks * 34);
for _ in 0..num_blocks {
let scale = half::f16::from_f32(0.1);
data.extend_from_slice(&scale.to_le_bytes());
data.extend([0i8 as u8; 32]);
}
data
}
#[must_use]
pub fn create_q4_k_data(num_elements: usize) -> Vec<u8> {
let num_super_blocks = num_elements.div_ceil(256);
vec![0u8; num_super_blocks * 144]
}
#[must_use]
pub fn create_q4_k_data_2d(rows: usize, cols: usize) -> Vec<u8> {
const QK_K: usize = 256;
const SUPER_BLOCK_BYTES: usize = 144;
let super_blocks_per_row = cols.div_ceil(QK_K);
vec![0u8; rows * super_blocks_per_row * SUPER_BLOCK_BYTES]
}
#[must_use]
pub fn create_q5_k_data(num_elements: usize) -> Vec<u8> {
let num_super_blocks = num_elements.div_ceil(256);
vec![0u8; num_super_blocks * 176]
}
#[must_use]
pub fn create_q6_k_data(num_elements: usize) -> Vec<u8> {
let num_super_blocks = num_elements.div_ceil(256);
vec![0u8; num_super_blocks * 210]
}
#[must_use]
pub fn create_q2_k_data(num_elements: usize) -> Vec<u8> {
let num_super_blocks = num_elements.div_ceil(256);
vec![0u8; num_super_blocks * 84]
}
#[must_use]
pub fn create_f16_data(num_elements: usize) -> Vec<u8> {
let mut data = Vec::with_capacity(num_elements * 2);
for i in 0..num_elements {
let val = half::f16::from_f32((i as f32) * 0.01);
data.extend_from_slice(&val.to_le_bytes());
}
data
}
#[must_use]
pub fn create_q4_1_data(num_elements: usize) -> Vec<u8> {
let num_blocks = num_elements.div_ceil(32);
let mut data = Vec::with_capacity(num_blocks * 20);
for _ in 0..num_blocks {
let scale = half::f16::from_f32(0.1);
data.extend_from_slice(&scale.to_le_bytes());
let min = half::f16::from_f32(0.0);
data.extend_from_slice(&min.to_le_bytes());
data.extend([0x88u8; 16]);
}
data
}
#[must_use]
pub fn create_q5_0_data(num_elements: usize) -> Vec<u8> {
let num_blocks = num_elements.div_ceil(32);
let mut data = Vec::with_capacity(num_blocks * 22);
for _ in 0..num_blocks {
let scale = half::f16::from_f32(0.1);
data.extend_from_slice(&scale.to_le_bytes());
data.extend([0u8; 4]); data.extend([0x88u8; 16]); }
data
}
#[must_use]
pub fn create_q5_1_data(num_elements: usize) -> Vec<u8> {
let num_blocks = num_elements.div_ceil(32);
let mut data = Vec::with_capacity(num_blocks * 24);
for _ in 0..num_blocks {
let scale = half::f16::from_f32(0.1);
data.extend_from_slice(&scale.to_le_bytes());
let min = half::f16::from_f32(0.0);
data.extend_from_slice(&min.to_le_bytes());
data.extend([0u8; 4]); data.extend([0x88u8; 16]); }
data
}
#[must_use]
pub fn create_f32_embedding_data(vocab_size: usize, hidden_dim: usize) -> Vec<f32> {
let mut data = Vec::with_capacity(vocab_size * hidden_dim);
for i in 0..(vocab_size * hidden_dim) {
let val = ((i % 1000) as f32 - 500.0) / 5000.0;
data.push(val);
}
data
}
#[must_use]
pub fn create_f32_norm_weights(dim: usize) -> Vec<f32> {
vec![1.0f32; dim]
}
#[must_use]
pub fn build_minimal_llama_gguf(
vocab_size: usize,
hidden_dim: usize,
intermediate_dim: usize,
num_heads: usize,
num_kv_heads: usize,
) -> Vec<u8> {
let head_dim = hidden_dim / num_heads;
let kv_dim = num_kv_heads * head_dim;
let embed_data = create_f32_embedding_data(vocab_size, hidden_dim);
let norm_data = create_f32_norm_weights(hidden_dim);
let q_data = create_q4_k_data_2d(hidden_dim, hidden_dim);
let k_data = create_q4_k_data_2d(hidden_dim, kv_dim);
let v_data = create_q4_k_data_2d(hidden_dim, kv_dim);
let attn_out_data = create_q4_k_data_2d(hidden_dim, hidden_dim);
let ffn_up_data = create_q4_k_data_2d(hidden_dim, intermediate_dim);
let ffn_down_data = create_q4_k_data_2d(intermediate_dim, hidden_dim);
let ffn_gate_data = create_q4_k_data_2d(hidden_dim, intermediate_dim);
GGUFBuilder::new()
.architecture("llama")
.hidden_dim("llama", hidden_dim as u32)
.num_layers("llama", 1)
.num_heads("llama", num_heads as u32)
.num_kv_heads("llama", num_kv_heads as u32)
.context_length("llama", 256)
.rope_freq_base("llama", 10000.0)
.rms_epsilon("llama", 1e-5)
.ffn_hidden_dim("llama", intermediate_dim as u32)
.add_f32_tensor(
"token_embd.weight",
&[vocab_size as u64, hidden_dim as u64],
&embed_data,
)
.add_f32_tensor("blk.0.attn_norm.weight", &[hidden_dim as u64], &norm_data)
.add_q4_k_tensor(
"blk.0.attn_q.weight",
&[hidden_dim as u64, hidden_dim as u64],
&q_data,
)
.add_q4_k_tensor(
"blk.0.attn_k.weight",
&[hidden_dim as u64, kv_dim as u64],
&k_data,
)
.add_q4_k_tensor(
"blk.0.attn_v.weight",
&[hidden_dim as u64, kv_dim as u64],
&v_data,
)
.add_q4_k_tensor(
"blk.0.attn_output.weight",
&[hidden_dim as u64, hidden_dim as u64],
&attn_out_data,
)
.add_f32_tensor("blk.0.ffn_norm.weight", &[hidden_dim as u64], &norm_data)
.add_q4_k_tensor(
"blk.0.ffn_up.weight",
&[hidden_dim as u64, intermediate_dim as u64],
&ffn_up_data,
)
.add_q4_k_tensor(
"blk.0.ffn_down.weight",
&[intermediate_dim as u64, hidden_dim as u64],
&ffn_down_data,
)
.add_q4_k_tensor(
"blk.0.ffn_gate.weight",
&[hidden_dim as u64, intermediate_dim as u64],
&ffn_gate_data,
)
.add_f32_tensor("output_norm.weight", &[hidden_dim as u64], &norm_data)
.build()
}
#[must_use]
pub fn build_executable_pygmy_gguf() -> Vec<u8> {
const VOCAB_SIZE: usize = 32;
const HIDDEN_DIM: usize = 32;
const INTERMEDIATE_DIM: usize = 64;
const NUM_HEADS: usize = 4;
const NUM_KV_HEADS: usize = 4;
const CONTEXT_LENGTH: usize = 32;
let kv_dim = NUM_KV_HEADS * (HIDDEN_DIM / NUM_HEADS);
let embed_data: Vec<f32> = (0..VOCAB_SIZE * HIDDEN_DIM)
.map(|i| ((i % 100) as f32 - 50.0) / 1000.0)
.collect();
let norm_data: Vec<f32> = vec![1.0; HIDDEN_DIM];
let q_data = create_q4_0_data(HIDDEN_DIM * HIDDEN_DIM); let k_data = create_q4_0_data(HIDDEN_DIM * kv_dim); let v_data = create_q4_0_data(HIDDEN_DIM * kv_dim); let attn_out_data = create_q4_0_data(HIDDEN_DIM * HIDDEN_DIM);
let ffn_gate_data = create_q4_0_data(HIDDEN_DIM * INTERMEDIATE_DIM); let ffn_up_data = create_q4_0_data(HIDDEN_DIM * INTERMEDIATE_DIM); let ffn_down_data = create_q4_0_data(INTERMEDIATE_DIM * HIDDEN_DIM);
let lm_head_data = create_q4_0_data(HIDDEN_DIM * VOCAB_SIZE);
GGUFBuilder::new()
.architecture("llama")
.hidden_dim("llama", HIDDEN_DIM as u32)
.num_layers("llama", 1)
.num_heads("llama", NUM_HEADS as u32)
.num_kv_heads("llama", NUM_KV_HEADS as u32)
.context_length("llama", CONTEXT_LENGTH as u32)
.rope_freq_base("llama", 10000.0)
.rms_epsilon("llama", 1e-5)
.ffn_hidden_dim("llama", INTERMEDIATE_DIM as u32)
.add_f32_tensor(
"token_embd.weight",
&[VOCAB_SIZE as u64, HIDDEN_DIM as u64],
&embed_data,
)
.add_f32_tensor("blk.0.attn_norm.weight", &[HIDDEN_DIM as u64], &norm_data)
.add_q4_0_tensor(
"blk.0.attn_q.weight",
&[HIDDEN_DIM as u64, HIDDEN_DIM as u64],
&q_data,
)
.add_q4_0_tensor(
"blk.0.attn_k.weight",
&[HIDDEN_DIM as u64, kv_dim as u64],
&k_data,
)
.add_q4_0_tensor(
"blk.0.attn_v.weight",
&[HIDDEN_DIM as u64, kv_dim as u64],
&v_data,
)
.add_q4_0_tensor(
"blk.0.attn_output.weight",
&[HIDDEN_DIM as u64, HIDDEN_DIM as u64],
&attn_out_data,
)
.add_f32_tensor("blk.0.ffn_norm.weight", &[HIDDEN_DIM as u64], &norm_data)
.add_q4_0_tensor(
"blk.0.ffn_gate.weight",
&[HIDDEN_DIM as u64, INTERMEDIATE_DIM as u64],
&ffn_gate_data,
)
.add_q4_0_tensor(
"blk.0.ffn_up.weight",
&[HIDDEN_DIM as u64, INTERMEDIATE_DIM as u64],
&ffn_up_data,
)
.add_q4_0_tensor(
"blk.0.ffn_down.weight",
&[INTERMEDIATE_DIM as u64, HIDDEN_DIM as u64],
&ffn_down_data,
)
.add_f32_tensor("output_norm.weight", &[HIDDEN_DIM as u64], &norm_data)
.add_q4_0_tensor(
"output.weight",
&[HIDDEN_DIM as u64, VOCAB_SIZE as u64],
&lm_head_data,
)
.build()
}