#[cfg(test)]
#[cfg(feature = "cuda")]
mod tests {
use super::*;
#[test]
fn test_config_extraction() {
let json = SafetensorsConfig {
hidden_size: Some(1536),
num_hidden_layers: Some(28),
num_attention_heads: Some(12),
num_key_value_heads: Some(2),
vocab_size: Some(151936),
intermediate_size: Some(8960),
max_position_embeddings: Some(32768),
rope_theta: Some(1000000.0),
rms_norm_eps: Some(1e-6),
architectures: Some(vec!["Qwen2ForCausalLM".to_string()]),
model_type: Some("qwen2".to_string()),
bos_token_id: Some(151643),
eos_token_id: Some(151645),
tie_word_embeddings: Some(true), ..Default::default()
};
let config = SafeTensorsCudaModel::extract_config(&json).expect("config");
assert_eq!(config.hidden_dim, 1536);
assert_eq!(config.num_layers, 28);
assert_eq!(config.num_heads, 12);
assert_eq!(config.num_kv_heads, 2);
assert_eq!(config.vocab_size, 151936);
assert_eq!(config.intermediate_dim, 8960);
assert_eq!(config.context_length, 32768);
assert!((config.rope_theta - 1_000_000.0).abs() < 1.0);
assert!((config.eps - 1e-6).abs() < 1e-9);
}
#[test]
fn test_config_extraction_defaults() {
let json = SafetensorsConfig {
hidden_size: Some(768),
num_hidden_layers: Some(12),
num_attention_heads: Some(12),
num_key_value_heads: None, vocab_size: Some(50257),
intermediate_size: None, max_position_embeddings: None, rope_theta: None, rms_norm_eps: None, architectures: None,
model_type: None,
bos_token_id: None,
eos_token_id: None,
tie_word_embeddings: None, ..Default::default()
};
let config = SafeTensorsCudaModel::extract_config(&json).expect("config");
assert_eq!(config.hidden_dim, 768);
assert_eq!(config.intermediate_dim, 768 * 4); assert_eq!(config.context_length, 2048); assert!((config.rope_theta - 10000.0).abs() < 0.1); assert!((config.eps - 1e-6).abs() < 1e-9); }
#[test]
fn test_config_extraction_missing_hidden_size() {
let json = SafetensorsConfig {
hidden_size: None, num_hidden_layers: Some(12),
num_attention_heads: Some(12),
num_key_value_heads: None,
vocab_size: Some(50257),
intermediate_size: None,
max_position_embeddings: None,
rope_theta: None,
rms_norm_eps: None,
architectures: None,
model_type: None,
bos_token_id: None,
eos_token_id: None,
tie_word_embeddings: None,
..Default::default()
};
let result = SafeTensorsCudaModel::extract_config(&json);
assert!(result.is_err());
}
#[test]
fn test_config_extraction_missing_layers() {
let json = SafetensorsConfig {
hidden_size: Some(768),
num_hidden_layers: None, num_attention_heads: Some(12),
num_key_value_heads: None,
vocab_size: Some(50257),
intermediate_size: None,
max_position_embeddings: None,
rope_theta: None,
rms_norm_eps: None,
architectures: None,
model_type: None,
bos_token_id: None,
eos_token_id: None,
tie_word_embeddings: None,
..Default::default()
};
let result = SafeTensorsCudaModel::extract_config(&json);
assert!(result.is_err());
}
#[test]
fn test_config_extraction_missing_attention_heads() {
let json = SafetensorsConfig {
hidden_size: Some(768),
num_hidden_layers: Some(12),
num_attention_heads: None, num_key_value_heads: None,
vocab_size: Some(50257),
intermediate_size: None,
max_position_embeddings: None,
rope_theta: None,
rms_norm_eps: None,
architectures: None,
model_type: None,
bos_token_id: None,
eos_token_id: None,
tie_word_embeddings: None,
..Default::default()
};
let result = SafeTensorsCudaModel::extract_config(&json);
assert!(result.is_err());
}
#[test]
fn test_config_extraction_missing_vocab_size() {
let json = SafetensorsConfig {
hidden_size: Some(768),
num_hidden_layers: Some(12),
num_attention_heads: Some(12),
num_key_value_heads: None,
vocab_size: None, intermediate_size: None,
max_position_embeddings: None,
rope_theta: None,
rms_norm_eps: None,
architectures: None,
model_type: None,
bos_token_id: None,
eos_token_id: None,
tie_word_embeddings: None,
..Default::default()
};
let result = SafeTensorsCudaModel::extract_config(&json);
assert!(result.is_err());
}
#[test]
fn test_transpose_for_gemm_identity() {
let weight = vec![1.0, 2.0, 3.0, 4.0]; let transposed = SafeTensorsCudaModel::transpose_for_gemm(&weight, 2, 2);
assert_eq!(transposed, vec![1.0, 3.0, 2.0, 4.0]);
}
#[test]
fn test_transpose_for_gemm_rectangular() {
let weight = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; let transposed = SafeTensorsCudaModel::transpose_for_gemm(&weight, 2, 3);
assert_eq!(transposed, vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
}
#[test]
fn test_transpose_for_gemm_single_row() {
let weight = vec![1.0, 2.0, 3.0, 4.0]; let transposed = SafeTensorsCudaModel::transpose_for_gemm(&weight, 1, 4);
assert_eq!(transposed, vec![1.0, 2.0, 3.0, 4.0]);
}
#[test]
fn test_transpose_for_gemm_single_col() {
let weight = vec![1.0, 2.0, 3.0, 4.0]; let transposed = SafeTensorsCudaModel::transpose_for_gemm(&weight, 4, 1);
assert_eq!(transposed, vec![1.0, 2.0, 3.0, 4.0]);
}
#[test]
fn test_transpose_for_gemm_4x4() {
let weight = vec![
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
];
let transposed = SafeTensorsCudaModel::transpose_for_gemm(&weight, 4, 4);
assert_eq!(transposed[0], 1.0);
assert_eq!(transposed[5], 6.0);
assert_eq!(transposed[10], 11.0);
assert_eq!(transposed[15], 16.0);
assert_eq!(transposed[1], 5.0); assert_eq!(transposed[4], 2.0); }
#[test]
fn test_concat_qkv_transposed_simple() {
let q = vec![
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
]; let k = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; let v = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let qkv = SafeTensorsCudaModel::concat_qkv_transposed(&q, &k, &v, 4, 2);
assert_eq!(qkv.len(), 4 * 8);
}
#[test]
fn test_concat_qkv_transposed_dimensions() {
let hidden_dim = 64;
let kv_dim = 16;
let q = vec![0.1f32; hidden_dim * hidden_dim];
let k = vec![0.2f32; kv_dim * hidden_dim];
let v = vec![0.3f32; kv_dim * hidden_dim];
let qkv = SafeTensorsCudaModel::concat_qkv_transposed(&q, &k, &v, hidden_dim, kv_dim);
let expected_len = hidden_dim * (hidden_dim + 2 * kv_dim);
assert_eq!(qkv.len(), expected_len);
}
#[test]
fn test_safetensors_cuda_config_debug() {
let config = SafeTensorsCudaConfig {
architecture: "Qwen2".to_string(),
hidden_dim: 768,
num_layers: 12,
num_heads: 12,
num_kv_heads: 4,
vocab_size: 50257,
intermediate_dim: 3072,
context_length: 2048,
rope_theta: 10000.0,
eps: 1e-6,
tie_word_embeddings: true,
has_qk_norm: false,
has_bias: true,
eos_token_id: None,
};
let debug_str = format!("{:?}", config);
assert!(debug_str.contains("Qwen2"));
assert!(debug_str.contains("768"));
assert!(debug_str.contains("12"));
}
#[test]
fn test_safetensors_cuda_config_clone() {
let config = SafeTensorsCudaConfig {
architecture: "LLaMA".to_string(),
hidden_dim: 4096,
num_layers: 32,
num_heads: 32,
num_kv_heads: 8,
vocab_size: 32000,
intermediate_dim: 11008,
context_length: 4096,
rope_theta: 10000.0,
eps: 1e-5,
tie_word_embeddings: false,
has_qk_norm: false,
has_bias: false,
eos_token_id: None,
};
let cloned = config.clone();
assert_eq!(cloned.architecture, config.architecture);
assert_eq!(cloned.hidden_dim, config.hidden_dim);
assert_eq!(cloned.num_layers, config.num_layers);
assert_eq!(cloned.num_heads, config.num_heads);
assert_eq!(cloned.num_kv_heads, config.num_kv_heads);
assert_eq!(cloned.vocab_size, config.vocab_size);
assert_eq!(cloned.intermediate_dim, config.intermediate_dim);
assert_eq!(cloned.context_length, config.context_length);
assert!((cloned.rope_theta - config.rope_theta).abs() < 0.001);
assert!((cloned.eps - config.eps).abs() < 1e-10);
}
#[test]
fn test_transpose_preserves_values() {
let weight: Vec<f32> = (1..=12).map(|x| x as f32).collect();
let transposed = SafeTensorsCudaModel::transpose_for_gemm(&weight, 3, 4);
let original_sum: f32 = weight.iter().sum();
let transposed_sum: f32 = transposed.iter().sum();
assert!((original_sum - transposed_sum).abs() < 1e-6);
}
#[test]
fn test_transpose_double_transpose_is_identity() {
let weight: Vec<f32> = (1..=20).map(|x| x as f32).collect();
let n = 4;
let k = 5;
let transposed1 = SafeTensorsCudaModel::transpose_for_gemm(&weight, n, k);
let transposed2 = SafeTensorsCudaModel::transpose_for_gemm(&transposed1, k, n);
for (orig, back) in weight.iter().zip(transposed2.iter()) {
assert!((orig - back).abs() < 1e-6);
}
}
#[test]
fn test_estimate_vram_bytes_qwen2_1_5b() {
let config = SafeTensorsCudaConfig {
architecture: "Qwen2".to_string(),
hidden_dim: 1536,
num_layers: 28,
num_heads: 12,
num_kv_heads: 2,
vocab_size: 151936,
intermediate_dim: 8960,
context_length: 32768,
rope_theta: 1000000.0,
eps: 1e-6,
tie_word_embeddings: true,
has_qk_norm: false,
has_bias: true,
eos_token_id: None,
};
let vram = SafeTensorsCudaModel::estimate_vram_bytes(&config, 2048);
let vram_mb = vram / (1024 * 1024);
assert!(
vram_mb > 5500 && vram_mb < 7000,
"Expected 5.5-7 GB for Qwen2.5-Coder-1.5B F32, got {} MB",
vram_mb
);
}
#[test]
fn test_estimate_vram_bytes_scales_with_layers() {
let config_12 = SafeTensorsCudaConfig {
architecture: "Test".to_string(),
hidden_dim: 768,
num_layers: 12,
num_heads: 12,
num_kv_heads: 12,
vocab_size: 50257,
intermediate_dim: 3072,
context_length: 2048,
rope_theta: 10000.0,
eps: 1e-6,
tie_word_embeddings: false,
has_qk_norm: false,
has_bias: false,
eos_token_id: None,
};
let config_24 = SafeTensorsCudaConfig {
num_layers: 24,
..config_12.clone()
};
let vram_12 = SafeTensorsCudaModel::estimate_vram_bytes(&config_12, 1024);
let vram_24 = SafeTensorsCudaModel::estimate_vram_bytes(&config_24, 1024);
assert!(
vram_24 > vram_12,
"24 layers ({}) should use more VRAM than 12 layers ({})",
vram_24,
vram_12
);
}
#[test]
fn test_estimate_vram_bytes_scales_with_seq_len() {
let config = SafeTensorsCudaConfig {
architecture: "Test".to_string(),
hidden_dim: 768,
num_layers: 12,
num_heads: 12,
num_kv_heads: 12,
vocab_size: 50257,
intermediate_dim: 3072,
context_length: 2048,
rope_theta: 10000.0,
eps: 1e-6,
tie_word_embeddings: false,
has_qk_norm: false,
has_bias: false,
eos_token_id: None,
};
let vram_1k = SafeTensorsCudaModel::estimate_vram_bytes(&config, 1024);
let vram_4k = SafeTensorsCudaModel::estimate_vram_bytes(&config, 4096);
assert!(
vram_4k > vram_1k,
"4k context ({}) should use more VRAM than 1k context ({})",
vram_4k,
vram_1k
);
}
#[test]
fn test_transpose_for_gemm_undersized_weight() {
let weight = vec![1.0, 2.0, 3.0]; let transposed = SafeTensorsCudaModel::transpose_for_gemm(&weight, 2, 3);
assert_eq!(transposed.len(), 6);
assert_eq!(transposed[0], 1.0); assert_eq!(transposed[2], 2.0); assert_eq!(transposed[4], 3.0);
assert_eq!(transposed[1], 0.0); assert_eq!(transposed[3], 0.0); assert_eq!(transposed[5], 0.0); }
#[test]
fn test_transpose_for_gemm_undersized_weight_partial_row() {
let weight = vec![1.0, 2.0, 3.0, 4.0, 5.0]; let transposed = SafeTensorsCudaModel::transpose_for_gemm(&weight, 2, 4);
assert_eq!(transposed.len(), 8);
assert_eq!(transposed[0], 1.0);
assert_eq!(transposed[2], 2.0);
assert_eq!(transposed[4], 3.0);
assert_eq!(transposed[6], 4.0);
assert_eq!(transposed[1], 5.0);
assert_eq!(transposed[3], 0.0); assert_eq!(transposed[5], 0.0);
assert_eq!(transposed[7], 0.0);
}
#[test]
fn test_transpose_for_gemm_empty_weight() {
let weight: Vec<f32> = vec![];
let transposed = SafeTensorsCudaModel::transpose_for_gemm(&weight, 3, 2);
assert_eq!(transposed.len(), 6);
assert!(transposed.iter().all(|&x| x == 0.0));
}
#[test]
fn test_concat_qkv_transposed_content_correctness() {
let q = vec![1.0, 2.0, 3.0, 4.0]; let k = vec![5.0, 6.0]; let v = vec![7.0, 8.0];
let qkv = SafeTensorsCudaModel::concat_qkv_transposed(&q, &k, &v, 2, 1);
assert_eq!(qkv.len(), 8);
assert_eq!(qkv[0], 1.0);
assert_eq!(qkv[1], 3.0);
assert_eq!(qkv[2], 5.0);
assert_eq!(qkv[3], 7.0);
assert_eq!(qkv[4], 2.0);
assert_eq!(qkv[5], 4.0);
assert_eq!(qkv[6], 6.0);
assert_eq!(qkv[7], 8.0);
}
#[test]
fn test_concat_qkv_transposed_equal_kv() {
let hidden_dim = 4;
let kv_dim = 4;
let q = vec![1.0f32; hidden_dim * hidden_dim];
let k = vec![2.0f32; kv_dim * hidden_dim];
let v = vec![3.0f32; kv_dim * hidden_dim];
let qkv = SafeTensorsCudaModel::concat_qkv_transposed(&q, &k, &v, hidden_dim, kv_dim);
let expected_len = hidden_dim * (hidden_dim + 2 * kv_dim);
assert_eq!(qkv.len(), expected_len);
}
}