use std::io::Write;
use std::path::PathBuf;
use tempfile::TempDir;
const GGUF_MAGIC: u32 = 0x4655_4747; const GGUF_VERSION_V3: u32 = 3;
const GGUF_ALIGNMENT: usize = 32;
const GGUF_TYPE_F32: u32 = 0;
const GGUF_TYPE_Q4_0: u32 = 2;
pub struct GgufBuilder {
metadata: Vec<(String, u32, Vec<u8>)>,
tensors: Vec<(String, Vec<u64>, u32, Vec<u8>)>,
}
impl Default for GgufBuilder {
fn default() -> Self {
Self::new()
}
}
impl GgufBuilder {
#[must_use]
pub fn new() -> Self {
Self {
metadata: Vec::new(),
tensors: Vec::new(),
}
}
#[must_use]
pub fn add_string(mut self, key: &str, value: &str) -> Self {
let mut bytes = Vec::new();
bytes.extend_from_slice(&(value.len() as u64).to_le_bytes());
bytes.extend_from_slice(value.as_bytes());
self.metadata.push((key.to_string(), 8, bytes));
self
}
#[must_use]
pub fn add_u32(mut self, key: &str, value: u32) -> Self {
self.metadata
.push((key.to_string(), 4, value.to_le_bytes().to_vec()));
self
}
#[must_use]
pub fn add_f32(mut self, key: &str, value: f32) -> Self {
self.metadata
.push((key.to_string(), 6, value.to_le_bytes().to_vec()));
self
}
#[must_use]
pub fn architecture(self, arch: &str) -> Self {
self.add_string("general.architecture", arch)
}
#[must_use]
pub fn hidden_dim(self, arch: &str, dim: u32) -> Self {
self.add_u32(&format!("{arch}.embedding_length"), dim)
}
#[must_use]
pub fn num_layers(self, arch: &str, count: u32) -> Self {
self.add_u32(&format!("{arch}.block_count"), count)
}
#[must_use]
pub fn num_heads(self, arch: &str, count: u32) -> Self {
self.add_u32(&format!("{arch}.attention.head_count"), count)
}
#[must_use]
pub fn num_kv_heads(self, arch: &str, count: u32) -> Self {
self.add_u32(&format!("{arch}.attention.head_count_kv"), count)
}
#[must_use]
pub fn context_length(self, arch: &str, len: u32) -> Self {
self.add_u32(&format!("{arch}.context_length"), len)
}
#[must_use]
pub fn rope_freq_base(self, arch: &str, base: f32) -> Self {
self.add_f32(&format!("{arch}.rope.freq_base"), base)
}
#[must_use]
pub fn rms_epsilon(self, arch: &str, eps: f32) -> Self {
self.add_f32(&format!("{arch}.attention.layer_norm_rms_epsilon"), eps)
}
#[must_use]
pub fn ffn_hidden_dim(self, arch: &str, dim: u32) -> Self {
self.add_u32(&format!("{arch}.feed_forward_length"), dim)
}
#[must_use]
pub fn add_f32_tensor(mut self, name: &str, dims: &[u64], data: &[f32]) -> Self {
let bytes: Vec<u8> = data.iter().flat_map(|f| f.to_le_bytes()).collect();
self.tensors
.push((name.to_string(), dims.to_vec(), GGUF_TYPE_F32, bytes));
self
}
#[must_use]
pub fn add_q4_0_tensor(mut self, name: &str, dims: &[u64], data: &[u8]) -> Self {
self.tensors.push((
name.to_string(),
dims.to_vec(),
GGUF_TYPE_Q4_0,
data.to_vec(),
));
self
}
#[must_use]
pub fn build(self) -> Vec<u8> {
let mut data = Vec::new();
data.extend_from_slice(&GGUF_MAGIC.to_le_bytes());
data.extend_from_slice(&GGUF_VERSION_V3.to_le_bytes());
data.extend_from_slice(&(self.tensors.len() as u64).to_le_bytes());
data.extend_from_slice(&(self.metadata.len() as u64).to_le_bytes());
for (key, value_type, value_bytes) in &self.metadata {
data.extend_from_slice(&(key.len() as u64).to_le_bytes());
data.extend_from_slice(key.as_bytes());
data.extend_from_slice(&value_type.to_le_bytes());
data.extend_from_slice(value_bytes);
}
let mut tensor_data_offset = 0u64;
for (name, dims, qtype, tensor_bytes) in &self.tensors {
data.extend_from_slice(&(name.len() as u64).to_le_bytes());
data.extend_from_slice(name.as_bytes());
data.extend_from_slice(&(dims.len() as u32).to_le_bytes());
for dim in dims.iter().rev() {
data.extend_from_slice(&dim.to_le_bytes());
}
data.extend_from_slice(&qtype.to_le_bytes());
data.extend_from_slice(&tensor_data_offset.to_le_bytes());
tensor_data_offset += tensor_bytes.len() as u64;
}
let current_len = data.len();
let aligned = current_len.div_ceil(GGUF_ALIGNMENT) * GGUF_ALIGNMENT;
data.resize(aligned, 0);
for (_, _, _, tensor_bytes) in &self.tensors {
data.extend_from_slice(tensor_bytes);
}
data
}
}
fn create_q4_0_data(num_elements: usize) -> Vec<u8> {
let num_blocks = num_elements.div_ceil(32);
let mut data = Vec::with_capacity(num_blocks * 18);
for _ in 0..num_blocks {
let scale = half::f16::from_f32(0.1);
data.extend_from_slice(&scale.to_le_bytes());
data.extend([0x88u8; 16]);
}
data
}
#[must_use]
pub fn build_pygmy_gguf() -> Vec<u8> {
const VOCAB_SIZE: usize = 32;
const HIDDEN_DIM: usize = 32;
const INTERMEDIATE_DIM: usize = 64;
const NUM_HEADS: usize = 4;
const NUM_KV_HEADS: usize = 4;
const CONTEXT_LENGTH: usize = 32;
let kv_dim = NUM_KV_HEADS * (HIDDEN_DIM / NUM_HEADS);
let embed_data: Vec<f32> = (0..VOCAB_SIZE * HIDDEN_DIM)
.map(|i| ((i % 100) as f32 - 50.0) / 1000.0)
.collect();
let norm_data: Vec<f32> = vec![1.0; HIDDEN_DIM];
let q_data = create_q4_0_data(HIDDEN_DIM * HIDDEN_DIM);
let k_data = create_q4_0_data(HIDDEN_DIM * kv_dim);
let v_data = create_q4_0_data(HIDDEN_DIM * kv_dim);
let attn_out_data = create_q4_0_data(HIDDEN_DIM * HIDDEN_DIM);
let ffn_gate_data = create_q4_0_data(HIDDEN_DIM * INTERMEDIATE_DIM);
let ffn_up_data = create_q4_0_data(HIDDEN_DIM * INTERMEDIATE_DIM);
let ffn_down_data = create_q4_0_data(INTERMEDIATE_DIM * HIDDEN_DIM);
let lm_head_data = create_q4_0_data(HIDDEN_DIM * VOCAB_SIZE);
GgufBuilder::new()
.architecture("llama")
.hidden_dim("llama", HIDDEN_DIM as u32)
.num_layers("llama", 1)
.num_heads("llama", NUM_HEADS as u32)
.num_kv_heads("llama", NUM_KV_HEADS as u32)
.context_length("llama", CONTEXT_LENGTH as u32)
.rope_freq_base("llama", 10000.0)
.rms_epsilon("llama", 1e-5)
.ffn_hidden_dim("llama", INTERMEDIATE_DIM as u32)
.add_f32_tensor(
"token_embd.weight",
&[VOCAB_SIZE as u64, HIDDEN_DIM as u64],
&embed_data,
)
.add_f32_tensor("blk.0.attn_norm.weight", &[HIDDEN_DIM as u64], &norm_data)
.add_q4_0_tensor(
"blk.0.attn_q.weight",
&[HIDDEN_DIM as u64, HIDDEN_DIM as u64],
&q_data,
)
.add_q4_0_tensor(
"blk.0.attn_k.weight",
&[HIDDEN_DIM as u64, kv_dim as u64],
&k_data,
)
.add_q4_0_tensor(
"blk.0.attn_v.weight",
&[HIDDEN_DIM as u64, kv_dim as u64],
&v_data,
)
.add_q4_0_tensor(
"blk.0.attn_output.weight",
&[HIDDEN_DIM as u64, HIDDEN_DIM as u64],
&attn_out_data,
)
.add_f32_tensor("blk.0.ffn_norm.weight", &[HIDDEN_DIM as u64], &norm_data)
.add_q4_0_tensor(
"blk.0.ffn_gate.weight",
&[HIDDEN_DIM as u64, INTERMEDIATE_DIM as u64],
&ffn_gate_data,
)
.add_q4_0_tensor(
"blk.0.ffn_up.weight",
&[HIDDEN_DIM as u64, INTERMEDIATE_DIM as u64],
&ffn_up_data,
)
.add_q4_0_tensor(
"blk.0.ffn_down.weight",
&[INTERMEDIATE_DIM as u64, HIDDEN_DIM as u64],
&ffn_down_data,
)
.add_f32_tensor("output_norm.weight", &[HIDDEN_DIM as u64], &norm_data)
.add_q4_0_tensor(
"output.weight",
&[HIDDEN_DIM as u64, VOCAB_SIZE as u64],
&lm_head_data,
)
.build()
}
pub struct PygmyModelDir {
_temp_dir: TempDir,
pub root: PathBuf,
pub gguf_path: PathBuf,
pub apr_path: PathBuf,
pub st_path: PathBuf,
}
impl PygmyModelDir {
pub fn new() -> std::io::Result<Self> {
let temp_dir = TempDir::new()?;
let root = temp_dir.path().to_path_buf();
let gguf_dir = root.join("gguf");
let apr_dir = root.join("apr");
let st_dir = root.join("safetensors");
std::fs::create_dir_all(&gguf_dir)?;
std::fs::create_dir_all(&apr_dir)?;
std::fs::create_dir_all(&st_dir)?;
let gguf_path = gguf_dir.join("model.gguf");
let gguf_data = build_pygmy_gguf();
let mut file = std::fs::File::create(&gguf_path)?;
file.write_all(&gguf_data)?;
let apr_path = apr_dir.join("model.apr");
let mut file = std::fs::File::create(&apr_path)?;
file.write_all(b"APR\x00")?;
file.write_all(&[0u8; 60])?;
let st_path = st_dir.join("model.safetensors");
let mut file = std::fs::File::create(&st_path)?;
file.write_all(&8u64.to_le_bytes())?; file.write_all(b"{}")?; file.write_all(&[0u8; 2])?;
Ok(Self {
_temp_dir: temp_dir,
root,
gguf_path,
apr_path,
st_path,
})
}
#[must_use]
pub fn root_str(&self) -> &str {
self.root.to_str().unwrap_or("")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_build_pygmy_gguf_valid_header() {
let data = build_pygmy_gguf();
let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
assert_eq!(magic, GGUF_MAGIC);
let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
assert_eq!(version, GGUF_VERSION_V3);
}
#[test]
fn test_build_pygmy_gguf_size() {
let data = build_pygmy_gguf();
assert!(
data.len() < 50_000,
"Pygmy should be < 50KB, got {}",
data.len()
);
assert!(
data.len() > 1_000,
"Pygmy should be > 1KB, got {}",
data.len()
);
}
#[test]
fn test_pygmy_model_dir_creates_files() {
let dir = PygmyModelDir::new().expect("Should create temp dir");
assert!(dir.gguf_path.exists(), "GGUF file should exist");
assert!(dir.apr_path.exists(), "APR file should exist");
assert!(dir.st_path.exists(), "SafeTensors file should exist");
}
#[test]
fn test_pygmy_model_dir_gguf_valid() {
let dir = PygmyModelDir::new().expect("Should create temp dir");
let data = std::fs::read(&dir.gguf_path).expect("Should read GGUF");
let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
assert_eq!(magic, GGUF_MAGIC);
}
#[test]
fn test_gguf_builder_empty() {
let data = GgufBuilder::new().build();
assert!(data.len() >= 24);
let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
assert_eq!(magic, GGUF_MAGIC);
}
#[test]
fn test_gguf_builder_with_metadata() {
let data = GgufBuilder::new()
.architecture("llama")
.add_u32("test.value", 42)
.build();
let n_metadata = u64::from_le_bytes([
data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
]);
assert_eq!(n_metadata, 2);
}
#[test]
fn test_gguf_builder_default() {
let builder = GgufBuilder::default();
let data = builder.build();
let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
assert_eq!(magic, GGUF_MAGIC);
}
#[test]
fn test_pygmy_model_dir_root_str() {
let dir = PygmyModelDir::new().expect("Should create temp dir");
let root_str = dir.root_str();
assert!(!root_str.is_empty());
}
}