use crate::gguf::{
GGUFConfig, OwnedQKVWeights, OwnedQuantizedLayer, OwnedQuantizedModel, OwnedQuantizedTensor,
};
pub(crate) fn create_test_model_with_config(config: &GGUFConfig) -> OwnedQuantizedModel {
let vocab_size = config.vocab_size;
let hidden_dim = config.hidden_dim;
let intermediate_dim = config.intermediate_dim;
let kv_dim = config.num_kv_heads * (hidden_dim / config.num_heads);
let constraints = &config.constraints;
let qkv_out_dim = hidden_dim + 2 * kv_dim;
let qkv_weight = create_q4k_test_data(hidden_dim, qkv_out_dim);
let attn_output_weight = create_q4k_test_data(hidden_dim, hidden_dim);
let ffn_up_weight = create_q4k_test_data(hidden_dim, intermediate_dim);
let ffn_down_weight = create_q4k_test_data(intermediate_dim, hidden_dim);
let ffn_gate_weight = if constraints.has_gate_ffn() {
Some(create_q4k_test_data(hidden_dim, intermediate_dim))
} else {
None
};
let attn_norm_weight = vec![1.0f32; hidden_dim];
let attn_norm_bias = if !constraints.uses_rmsnorm() {
Some(vec![0.0f32; hidden_dim])
} else {
None
};
let layer = OwnedQuantizedLayer {
attn_norm_weight,
attn_norm_bias,
qkv_weight: OwnedQKVWeights::Fused(qkv_weight),
qkv_bias: None,
attn_output_weight,
attn_output_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_gate_weight,
ffn_gate_bias: None,
ffn_norm_weight: None,
ffn_norm_bias: None,
attn_q_norm_weight: None,
attn_k_norm_weight: None,
};
let token_embedding = vec![0.1f32; vocab_size * hidden_dim];
let output_norm_weight = vec![1.0f32; hidden_dim];
let lm_head_weight = create_q4k_test_data(hidden_dim, vocab_size);
let position_embedding = if constraints.uses_absolute_positions() {
Some(vec![0.01f32; config.context_length * hidden_dim])
} else {
None
};
let output_norm_bias = if !constraints.uses_rmsnorm() {
Some(vec![0.0f32; hidden_dim])
} else {
None
};
OwnedQuantizedModel {
config: config.clone(),
token_embedding,
position_embedding,
layers: vec![layer],
encoder_layers: vec![],
encoder_output_norm_weight: None,
encoder_output_norm_bias: None,
output_norm_weight,
output_norm_bias,
lm_head_weight,
lm_head_bias: None,
#[cfg(feature = "cuda")]
cuda_executor: None,
#[cfg(feature = "cuda")]
cuda_kernel_count: std::sync::atomic::AtomicU64::new(0),
#[cfg(feature = "cuda")]
cached_weight_names: std::sync::Mutex::new(std::collections::HashSet::new()),
}
}
pub(crate) fn create_q4k_test_data(in_dim: usize, out_dim: usize) -> OwnedQuantizedTensor {
let super_blocks_per_row = in_dim.div_ceil(256);
let bytes_per_row = super_blocks_per_row * 144;
let data_size = out_dim * bytes_per_row;
let mut data = vec![0u8; data_size];
for row in 0..out_dim {
for sb in 0..super_blocks_per_row {
let offset = row * bytes_per_row + sb * 144;
data[offset..offset + 2].copy_from_slice(&0x3C00_u16.to_le_bytes());
data[offset + 2..offset + 4].copy_from_slice(&0x0000_u16.to_le_bytes());
for i in 4..144 {
data[offset + i] = ((row + sb + i) % 16) as u8;
}
}
}
OwnedQuantizedTensor {
data,
in_dim,
out_dim,
qtype: 12, }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_q4k_test_data_basic() {
let tensor = create_q4k_test_data(256, 64);
assert_eq!(tensor.in_dim, 256);
assert_eq!(tensor.out_dim, 64);
assert_eq!(tensor.qtype, 12); assert_eq!(tensor.data.len(), 64 * 144);
}
#[test]
fn test_create_q4k_test_data_multi_superblock() {
let tensor = create_q4k_test_data(512, 32);
assert_eq!(tensor.in_dim, 512);
assert_eq!(tensor.out_dim, 32);
assert_eq!(tensor.data.len(), 32 * 2 * 144);
}
#[test]
fn test_create_test_model_with_config_basic() {
let config = GGUFConfig {
architecture: "test".to_string(),
constraints: crate::gguf::ArchConstraints::from_architecture("test"),
hidden_dim: 64,
intermediate_dim: 128,
num_heads: 4,
num_kv_heads: 4,
num_layers: 1,
vocab_size: 100,
rope_theta: 10000.0,
context_length: 512,
eps: 1e-5,
rope_type: 0,
explicit_head_dim: None,
bos_token_id: None,
eos_token_id: None,
};
let model = create_test_model_with_config(&config);
assert_eq!(model.config.hidden_dim, 64);
assert_eq!(model.layers.len(), 1);
assert_eq!(model.token_embedding.len(), 100 * 64);
}
#[test]
fn test_create_q4k_test_data_small_input() {
let tensor = create_q4k_test_data(64, 16);
assert_eq!(tensor.in_dim, 64);
assert_eq!(tensor.out_dim, 16);
assert_eq!(tensor.data.len(), 16 * 144);
}
#[test]
fn test_create_q4k_test_data_large_dimensions() {
let tensor = create_q4k_test_data(1024, 128);
assert_eq!(tensor.in_dim, 1024);
assert_eq!(tensor.out_dim, 128);
assert_eq!(tensor.data.len(), 128 * 4 * 144);
}
#[test]
fn test_create_q4k_test_data_d_value() {
let tensor = create_q4k_test_data(256, 1);
assert_eq!(tensor.data[0], 0x00);
assert_eq!(tensor.data[1], 0x3C);
}
#[test]
fn test_create_q4k_test_data_dmin_value() {
let tensor = create_q4k_test_data(256, 1);
assert_eq!(tensor.data[2], 0x00);
assert_eq!(tensor.data[3], 0x00);
}
#[test]
fn test_create_test_model_with_config_gqa() {
let config = GGUFConfig {
architecture: "gqa_test".to_string(),
constraints: crate::gguf::ArchConstraints::from_architecture("gqa_test"),
hidden_dim: 128,
intermediate_dim: 256,
num_heads: 8,
num_kv_heads: 2, num_layers: 1,
vocab_size: 100,
rope_theta: 10000.0,
context_length: 512,
eps: 1e-5,
rope_type: 0,
explicit_head_dim: None,
bos_token_id: None,
eos_token_id: None,
};
let model = create_test_model_with_config(&config);
assert_eq!(model.config.num_heads, 8);
assert_eq!(model.config.num_kv_heads, 2);
}
#[test]
fn test_create_test_model_output_norm() {
let config = GGUFConfig {
architecture: "test".to_string(),
constraints: crate::gguf::ArchConstraints::from_architecture("test"),
hidden_dim: 32,
intermediate_dim: 64,
num_heads: 2,
num_kv_heads: 2,
num_layers: 1,
vocab_size: 50,
rope_theta: 10000.0,
context_length: 256,
eps: 1e-5,
rope_type: 0,
explicit_head_dim: None,
bos_token_id: None,
eos_token_id: None,
};
let model = create_test_model_with_config(&config);
assert_eq!(model.output_norm_weight.len(), 32);
assert!(model
.output_norm_weight
.iter()
.all(|&w| (w - 1.0).abs() < f32::EPSILON));
}
#[test]
fn test_create_test_model_lm_head() {
let config = GGUFConfig {
architecture: "test".to_string(),
constraints: crate::gguf::ArchConstraints::from_architecture("test"),
hidden_dim: 64,
intermediate_dim: 128,
num_heads: 4,
num_kv_heads: 4,
num_layers: 1,
vocab_size: 100,
rope_theta: 10000.0,
context_length: 512,
eps: 1e-5,
rope_type: 0,
explicit_head_dim: None,
bos_token_id: None,
eos_token_id: None,
};
let model = create_test_model_with_config(&config);
assert_eq!(model.lm_head_weight.in_dim, 64);
assert_eq!(model.lm_head_weight.out_dim, 100);
}
#[test]
fn test_create_test_model_layer_attn_norm() {
let config = GGUFConfig {
architecture: "test".to_string(),
constraints: crate::gguf::ArchConstraints::from_architecture("test"),
hidden_dim: 48,
intermediate_dim: 96,
num_heads: 3,
num_kv_heads: 3,
num_layers: 1,
vocab_size: 50,
rope_theta: 10000.0,
context_length: 256,
eps: 1e-5,
rope_type: 0,
explicit_head_dim: None,
bos_token_id: None,
eos_token_id: None,
};
let model = create_test_model_with_config(&config);
assert_eq!(model.layers[0].attn_norm_weight.len(), 48);
assert!(model.layers[0]
.attn_norm_weight
.iter()
.all(|&w| (w - 1.0).abs() < f32::EPSILON));
}
#[test]
fn test_create_q4k_test_data_boundary() {
let tensor = create_q4k_test_data(256, 1);
assert_eq!(tensor.in_dim, 256);
let expected_size = 144; assert_eq!(tensor.data.len(), expected_size);
}
#[test]
fn test_create_q4k_test_data_just_over_boundary() {
let tensor = create_q4k_test_data(257, 1);
assert_eq!(tensor.in_dim, 257);
let expected_size = 2 * 144; assert_eq!(tensor.data.len(), expected_size);
}
}