use crate::format::v2::{AprV2Metadata, AprV2Writer};
#[must_use]
pub fn build_pygmy_safetensors() -> Vec<u8> {
build_pygmy_safetensors_with_config(PygmyConfig::default())
}
#[derive(Debug, Clone)]
#[allow(clippy::struct_excessive_bools)] pub struct PygmyConfig {
pub vocab_size: usize,
pub hidden_size: usize,
pub num_layers: usize,
pub num_heads: Option<usize>,
pub num_kv_heads: Option<usize>,
pub include_embedding: bool,
pub include_norms: bool,
pub include_attention: bool,
pub include_mlp: bool,
pub include_bias: bool,
pub tied_embeddings: bool,
}
impl Default for PygmyConfig {
fn default() -> Self {
Self {
vocab_size: 8,
hidden_size: 4,
num_layers: 1,
num_heads: None,
num_kv_heads: None,
include_embedding: true,
include_norms: true,
include_attention: true,
include_mlp: true,
include_bias: false,
tied_embeddings: false,
}
}
}
impl PygmyConfig {
#[must_use]
pub fn minimal() -> Self {
Self {
vocab_size: 4,
hidden_size: 2,
num_layers: 1,
include_embedding: true,
include_norms: false,
include_attention: false,
include_mlp: false,
..Default::default()
}
}
#[must_use]
pub fn embedding_only() -> Self {
Self {
vocab_size: 8,
hidden_size: 4,
num_layers: 0,
include_embedding: true,
include_norms: false,
include_attention: false,
include_mlp: false,
..Default::default()
}
}
#[must_use]
pub fn llama_style() -> Self {
Self {
vocab_size: 16,
hidden_size: 8,
num_layers: 1,
include_embedding: true,
include_norms: true,
include_attention: true,
include_mlp: true,
..Default::default()
}
}
#[must_use]
pub fn qwen2_gqa() -> Self {
Self {
vocab_size: 16,
hidden_size: 8,
num_layers: 2,
num_heads: Some(4),
num_kv_heads: Some(2),
include_embedding: true,
include_norms: true,
include_attention: true,
include_mlp: true,
include_bias: true,
tied_embeddings: false,
}
}
#[must_use]
pub fn qwen2_gqa_tied() -> Self {
Self {
tied_embeddings: true,
..Self::qwen2_gqa()
}
}
#[must_use]
pub fn realistic() -> Self {
Self {
vocab_size: 256,
hidden_size: 128,
num_layers: 2,
num_heads: Some(2),
num_kv_heads: Some(1),
include_embedding: true,
include_norms: true,
include_attention: true,
include_mlp: true,
include_bias: false,
tied_embeddings: false,
}
}
#[must_use]
pub fn effective_num_heads(&self) -> usize {
self.num_heads
.unwrap_or_else(|| (self.hidden_size / 64).max(1))
}
#[must_use]
pub fn effective_num_kv_heads(&self) -> usize {
self.num_kv_heads
.unwrap_or_else(|| self.effective_num_heads())
}
#[must_use]
pub fn head_dim(&self) -> usize {
let nh = self.effective_num_heads();
if nh > 0 {
self.hidden_size / nh
} else {
self.hidden_size
}
}
#[must_use]
pub fn kv_dim(&self) -> usize {
self.effective_num_kv_heads() * self.head_dim()
}
#[must_use]
pub fn to_config_json(&self) -> String {
let num_attention_heads = self.effective_num_heads();
let num_key_value_heads = self.effective_num_kv_heads();
let intermediate_size = self.hidden_size * 4;
format!(
r#"{{
"architectures": ["Qwen2ForCausalLM"],
"hidden_size": {},
"num_hidden_layers": {},
"num_attention_heads": {},
"num_key_value_heads": {},
"vocab_size": {},
"intermediate_size": {},
"max_position_embeddings": 2048,
"rms_norm_eps": 1e-06,
"rope_theta": 10000.0,
"model_type": "qwen2"
}}"#,
self.hidden_size,
self.num_layers,
num_attention_heads,
num_key_value_heads,
self.vocab_size,
intermediate_size
)
}
}
fn gen_st_embed_data(count: usize) -> Vec<f32> {
(0..count)
.map(|i| ((i % 100) as f32 - 50.0) / 1000.0)
.collect()
}
fn gen_st_weight_data(count: usize) -> Vec<f32> {
(0..count)
.map(|i| ((i % 200) as f32 - 100.0) / 2000.0)
.collect()
}
#[must_use]
pub fn build_pygmy_safetensors_with_config(config: PygmyConfig) -> Vec<u8> {
let mut tensors: Vec<(String, Vec<usize>, Vec<f32>)> = Vec::new();
let h = config.hidden_size;
let v = config.vocab_size;
if config.include_embedding {
tensors.push((
"model.embed_tokens.weight".to_string(),
vec![v, h],
gen_st_embed_data(v * h),
));
}
for layer_idx in 0..config.num_layers {
if config.include_norms {
let norm_data: Vec<f32> = vec![1.0; h];
tensors.push((
format!("model.layers.{layer_idx}.input_layernorm.weight"),
vec![h],
norm_data.clone(),
));
tensors.push((
format!("model.layers.{layer_idx}.post_attention_layernorm.weight"),
vec![h],
norm_data,
));
}
if config.include_attention {
let kv_dim = config.kv_dim();
let q_data = gen_st_weight_data(h * h);
tensors.push((
format!("model.layers.{layer_idx}.self_attn.q_proj.weight"),
vec![h, h],
q_data.clone(),
));
tensors.push((
format!("model.layers.{layer_idx}.self_attn.o_proj.weight"),
vec![h, h],
q_data,
));
let kv_data = gen_st_weight_data(kv_dim * h);
tensors.push((
format!("model.layers.{layer_idx}.self_attn.k_proj.weight"),
vec![kv_dim, h],
kv_data.clone(),
));
tensors.push((
format!("model.layers.{layer_idx}.self_attn.v_proj.weight"),
vec![kv_dim, h],
kv_data,
));
if config.include_bias {
let q_bias: Vec<f32> = (0..h).map(|i| (i as f32) / 1000.0).collect();
tensors.push((
format!("model.layers.{layer_idx}.self_attn.q_proj.bias"),
vec![h],
q_bias,
));
let kv_bias: Vec<f32> = (0..kv_dim).map(|i| (i as f32) / 1000.0).collect();
tensors.push((
format!("model.layers.{layer_idx}.self_attn.k_proj.bias"),
vec![kv_dim],
kv_bias.clone(),
));
tensors.push((
format!("model.layers.{layer_idx}.self_attn.v_proj.bias"),
vec![kv_dim],
kv_bias,
));
}
}
if config.include_mlp {
let intermediate = h * 2;
let gate_up_data = gen_st_weight_data(intermediate * h);
let down_data = gen_st_weight_data(h * intermediate);
tensors.push((
format!("model.layers.{layer_idx}.mlp.gate_proj.weight"),
vec![intermediate, h],
gate_up_data.clone(),
));
tensors.push((
format!("model.layers.{layer_idx}.mlp.up_proj.weight"),
vec![intermediate, h],
gate_up_data,
));
tensors.push((
format!("model.layers.{layer_idx}.mlp.down_proj.weight"),
vec![h, intermediate],
down_data,
));
}
}
if config.include_norms && config.num_layers > 0 {
tensors.push(("model.norm.weight".to_string(), vec![h], vec![1.0; h]));
}
if config.include_embedding && !config.tied_embeddings {
tensors.push((
"lm_head.weight".to_string(),
vec![v, h],
gen_st_embed_data(v * h),
));
}
build_safetensors_bytes(&tensors)
}
fn build_safetensors_bytes(tensors: &[(String, Vec<usize>, Vec<f32>)]) -> Vec<u8> {
use std::collections::BTreeMap;
let mut current_offset = 0usize;
let mut tensor_info: BTreeMap<String, serde_json::Value> = BTreeMap::new();
let mut all_data = Vec::new();
for (name, shape, data) in tensors {
let byte_size = data.len() * 4;
tensor_info.insert(
name.clone(),
serde_json::json!({
"dtype": "F32",
"shape": shape,
"data_offsets": [current_offset, current_offset + byte_size]
}),
);
for &val in data {
all_data.extend_from_slice(&val.to_le_bytes());
}
current_offset += byte_size;
}
tensor_info.insert(
"__metadata__".to_string(),
serde_json::json!({"format": "pt", "pygmy": "true"}),
);
let header_json = serde_json::to_string(&tensor_info).unwrap_or_default();
let header_bytes = header_json.as_bytes();
let header_len = header_bytes.len() as u64;
let mut result = Vec::with_capacity(8 + header_bytes.len() + all_data.len());
result.extend_from_slice(&header_len.to_le_bytes());
result.extend_from_slice(header_bytes);
result.extend_from_slice(&all_data);
result
}
include!("quant_variant.rs");
include!("gguf_pygmy_config.rs");