use super::{AprQ4ToGpuAdapter, GpuModelQ4, LayerNorms};
use crate::apr_transformer::{
AprTransformerConfig, QuantizedAprLayerQ4, QuantizedAprTensorQ4, QuantizedAprTransformerQ4,
};
#[test]
fn test_silu_values() {
use crate::gpu::adapters::apr_q4::silu;
assert!((silu(0.0) - 0.0).abs() < 1e-6);
let silu_1 = silu(1.0);
assert!(silu_1 > 0.7 && silu_1 < 0.8, "silu(1) = {silu_1}");
let silu_2 = silu(2.0);
assert!(silu_2 > 1.7 && silu_2 < 1.8, "silu(2) = {silu_2}");
let silu_neg1 = silu(-1.0);
assert!(
silu_neg1 > -0.3 && silu_neg1 < -0.2,
"silu(-1) = {silu_neg1}"
);
let silu_10 = silu(10.0);
assert!(silu_10 > 9.9, "silu(10) = {silu_10}");
let silu_neg10 = silu(-10.0);
assert!(silu_neg10.abs() < 0.001, "silu(-10) = {silu_neg10}");
}
#[test]
fn test_gelu_values() {
use crate::gpu::adapters::apr_q4::gelu;
assert!((gelu(0.0) - 0.0).abs() < 1e-6);
let gelu_1 = gelu(1.0);
assert!(gelu_1 > 0.8 && gelu_1 < 0.9, "gelu(1) = {gelu_1}");
let gelu_2 = gelu(2.0);
assert!(gelu_2 > 1.9 && gelu_2 < 2.0, "gelu(2) = {gelu_2}");
let gelu_neg1 = gelu(-1.0);
assert!(
gelu_neg1 > -0.2 && gelu_neg1 < -0.1,
"gelu(-1) = {gelu_neg1}"
);
let gelu_10 = gelu(10.0);
assert!(gelu_10 > 9.9, "gelu(10) = {gelu_10}");
let gelu_neg10 = gelu(-10.0);
assert!(gelu_neg10.abs() < 0.001, "gelu(-10) = {gelu_neg10}");
}
#[test]
fn test_rms_norm_uniform_weights() {
let model = create_tiny_model();
let mut x = vec![1.0, 2.0, 3.0, 4.0];
let weight = vec![1.0, 1.0, 1.0, 1.0];
model.rms_norm_inplace(&mut x, &weight);
let rms = ((1.0 + 4.0 + 9.0 + 16.0) / 4.0 + 1e-5_f32).sqrt();
let scale = 1.0 / rms;
assert!((x[0] - 1.0 * scale).abs() < 1e-5);
assert!((x[1] - 2.0 * scale).abs() < 1e-5);
assert!((x[2] - 3.0 * scale).abs() < 1e-5);
assert!((x[3] - 4.0 * scale).abs() < 1e-5);
}
#[test]
fn test_rms_norm_weighted() {
let model = create_tiny_model();
let mut x = vec![2.0, 2.0, 2.0, 2.0];
let weight = vec![0.5, 1.0, 1.5, 2.0];
model.rms_norm_inplace(&mut x, &weight);
let rms = (16.0 / 4.0 + 1e-5_f32).sqrt();
let scale = 1.0 / rms;
assert!((x[0] - 2.0 * scale * 0.5).abs() < 1e-5);
assert!((x[1] - 2.0 * scale * 1.0).abs() < 1e-5);
assert!((x[2] - 2.0 * scale * 1.5).abs() < 1e-5);
assert!((x[3] - 2.0 * scale * 2.0).abs() < 1e-5);
}
#[test]
fn test_rms_norm_zeros() {
let model = create_tiny_model();
let mut x = vec![0.0, 0.0, 0.0, 0.0];
let weight = vec![1.0, 1.0, 1.0, 1.0];
model.rms_norm_inplace(&mut x, &weight);
for v in &x {
assert!(v.abs() < 1e-5);
}
}
#[test]
fn test_rms_norm_negative() {
let model = create_tiny_model();
let mut x = vec![-1.0, -2.0, -3.0, -4.0];
let weight = vec![1.0, 1.0, 1.0, 1.0];
model.rms_norm_inplace(&mut x, &weight);
assert!(x[0] < 0.0);
assert!(x[1] < 0.0);
assert!(x[2] < 0.0);
assert!(x[3] < 0.0);
}
#[test]
fn test_rms_norm_short_weight() {
let model = create_tiny_model();
let mut x = vec![1.0, 2.0, 3.0, 4.0];
let weight = vec![2.0, 2.0];
model.rms_norm_inplace(&mut x, &weight);
let rms = ((1.0 + 4.0 + 9.0 + 16.0) / 4.0 + 1e-5_f32).sqrt();
let scale = 1.0 / rms;
assert!((x[0] - 1.0 * scale * 2.0).abs() < 1e-5);
assert!((x[1] - 2.0 * scale * 2.0).abs() < 1e-5);
assert!((x[2] - 3.0 * scale * 1.0).abs() < 1e-5);
assert!((x[3] - 4.0 * scale * 1.0).abs() < 1e-5);
}
#[test]
fn test_rope_position_zero() {
let model = create_model_for_rope(16, 2, 2); let head_dim = 16 / 2;
let kv_dim = 2 * head_dim; let qkv_dim = 16 + 2 * kv_dim;
let mut qkv = vec![1.0_f32; qkv_dim];
model.apply_rope_to_qkv(&mut qkv, 1, 16, 2, 2);
for (i, &v) in qkv.iter().enumerate().take(16) {
assert!(
(v - 1.0).abs() < 1e-5,
"Q[{i}] changed at pos 0: {v} != 1.0"
);
}
}
#[test]
fn test_rope_position_one() {
let model = create_model_for_rope(8, 2, 2); let head_dim = 4;
let kv_dim = 2 * head_dim; let qkv_dim = 8 + 2 * kv_dim;
let mut qkv = vec![1.0_f32; qkv_dim * 2];
model.apply_rope_to_qkv(&mut qkv, 2, 8, 2, 2);
let pos1_start = qkv_dim; let pos1_changed = qkv[pos1_start..pos1_start + qkv_dim]
.iter()
.any(|&v| (v - 1.0).abs() > 0.01);
assert!(pos1_changed, "RoPE should modify values at position 1");
}
#[test]
fn test_rope_qk_structure() {
let model = create_model_for_rope(8, 2, 2);
let kv_dim = 8; let qkv_dim = 8 + 2 * kv_dim;
let mut qkv: Vec<f32> = (0..qkv_dim).map(|i| i as f32).collect();
let original_v = qkv[16..24].to_vec();
model.apply_rope_to_qkv(&mut qkv, 1, 8, 2, 2);
for (i, (&orig, &new)) in original_v.iter().zip(qkv[16..24].iter()).enumerate() {
assert!(
(orig - new).abs() < 1e-6,
"V[{i}] was modified: {orig} -> {new}"
);
}
}
#[test]
fn test_apply_rope_inplace_single_head() {
let model = create_model_for_rope(4, 1, 1);
let mut x = vec![1.0, 2.0, 3.0, 4.0];
model.apply_rope_inplace(&mut x, 0, 1, 4, 10000.0);
assert!((x[0] - 1.0).abs() < 1e-5);
assert!((x[1] - 2.0).abs() < 1e-5);
}
#[test]
fn test_apply_rope_bounds_check() {
let model = create_model_for_rope(4, 1, 1);
let mut x = vec![1.0, 2.0];
model.apply_rope_inplace(&mut x, 1, 1, 4, 10000.0);
assert!(x[0].is_finite());
assert!(x[1].is_finite());
}
#[test]
fn test_attention_single_token_returns_v() {
let model = create_model_for_attention(8, 2, 2);
let qkv = vec![
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9,
];
let out = model.attention_cpu(&qkv, 1, 8, 2, 2);
assert_eq!(out.len(), 8);
for &v in &out {
assert!((v - 0.9).abs() < 1e-6);
}
}
#[test]
fn test_attention_gqa() {
let model = create_model_for_attention(8, 4, 2);
let head_dim = 8 / 4; let _kv_dim = 2 * head_dim;
let mut qkv = vec![0.0; 8 + 4 + 4];
qkv[0..8].fill(1.0);
qkv[8..12].fill(0.5);
qkv[12] = 0.1;
qkv[13] = 0.2;
qkv[14] = 0.3;
qkv[15] = 0.4;
let out = model.attention_cpu(&qkv, 1, 8, 4, 2);
assert_eq!(out.len(), 8);
assert!((out[0] - 0.1).abs() < 1e-6);
assert!((out[1] - 0.2).abs() < 1e-6);
assert!((out[2] - 0.1).abs() < 1e-6);
assert!((out[3] - 0.2).abs() < 1e-6);
assert!((out[4] - 0.3).abs() < 1e-6);
assert!((out[5] - 0.4).abs() < 1e-6);
assert!((out[6] - 0.3).abs() < 1e-6);
assert!((out[7] - 0.4).abs() < 1e-6);
}
#[test]
fn test_attention_multi_token() {
let model = create_model_for_attention(8, 2, 2);
let qkv = vec![
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, ];
let out = model.attention_cpu(&qkv, 2, 8, 2, 2);
assert_eq!(out.len(), 8);
}
#[test]
fn test_create_model_full() {
let apr = create_test_apr_model(true, true);
let model = AprQ4ToGpuAdapter::create_model(&apr);
assert_eq!(model.config.hidden_dim, 64);
assert_eq!(model.config.vocab_size, 100);
assert_eq!(model.num_layers, 2);
assert!(model.has_gate);
assert_eq!(model.layer_norms.len(), 2);
assert_eq!(model.token_embedding.len(), 64 * 100);
assert_eq!(model.output_norm_weight.len(), 64);
}
#[test]
fn test_create_model_no_gate() {
let apr = create_test_apr_model(false, true);
let model = AprQ4ToGpuAdapter::create_model(&apr);
assert!(!model.has_gate);
}
#[test]
fn test_create_model_default_ffn_norm() {
let apr = create_test_apr_model(true, false);
let model = AprQ4ToGpuAdapter::create_model(&apr);
for layer_norm in &model.layer_norms {
assert_eq!(layer_norm.ffn_norm.len(), 64);
for &w in &layer_norm.ffn_norm {
assert!((w - 1.0).abs() < 1e-6);
}
}
}
#[test]
fn test_create_model_no_layers() {
let apr = QuantizedAprTransformerQ4 {
config: AprTransformerConfig {
architecture: "test".to_string(),
hidden_dim: 64,
num_layers: 0,
num_heads: 4,
num_kv_heads: 4,
vocab_size: 100,
intermediate_dim: 128,
context_length: 256,
rope_theta: 10000.0,
eps: 1e-5,
eos_token_id: None,
..Default::default()
},
token_embedding: vec![0.1; 64 * 100],
layers: vec![],
output_norm_weight: vec![1.0; 64],
lm_head_weight: QuantizedAprTensorQ4::zeros(64, 100),
};
let model = AprQ4ToGpuAdapter::create_model(&apr);
assert_eq!(model.num_layers, 0);
assert!(!model.has_gate); assert!(model.layer_norms.is_empty());
}
include!("layer_norms_tests.rs");