#[test]
fn test_gpu_model_large_vocab_incremental() {
let config = create_large_vocab_config();
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new("large_vocab_incremental");
model.with_test_executor(Box::new(mock));
let mut kv_cache = StreamingKVCache::new(
config.num_layers,
64,
config.num_kv_heads,
config.head_dim(),
);
let result = model.forward_gpu_incremental_optimized(1, &mut kv_cache);
assert!(result.is_ok());
let logits = result.expect("logits");
assert_eq!(logits.len(), config.vocab_size);
}
#[test]
fn test_gpu_model_multi_layer_forward() {
let config = GpuModelConfig {
hidden_dim: 32,
intermediate_dim: 64,
num_layers: 4, num_heads: 2,
num_kv_heads: 2,
vocab_size: 50,
eps: 1e-5,
rope_theta: 10000.0,
layer_types: None,
linear_key_head_dim: None,
linear_value_head_dim: None,
linear_num_key_heads: None,
linear_num_value_heads: None,
linear_conv_kernel_dim: None,
constraints: None,
num_experts: None,
num_experts_per_tok: None,
expert_intermediate_size: None,
};
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new("multi_layer_forward");
model.with_test_executor(Box::new(mock));
let result = model.forward_gpu(&[1, 2, 3]);
assert!(result.is_ok());
let logits = result.expect("logits");
assert_eq!(logits.len(), 3 * config.vocab_size);
}
#[test]
fn test_gpu_model_all_blocks_forward_idx() {
let config = GpuModelConfig {
hidden_dim: 32,
intermediate_dim: 64,
num_layers: 3,
num_heads: 2,
num_kv_heads: 2,
vocab_size: 50,
eps: 1e-5,
rope_theta: 10000.0,
layer_types: None,
linear_key_head_dim: None,
linear_value_head_dim: None,
linear_num_key_heads: None,
linear_num_value_heads: None,
linear_conv_kernel_dim: None,
constraints: None,
num_experts: None,
num_experts_per_tok: None,
expert_intermediate_size: None,
};
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new("all_blocks");
model.with_test_executor(Box::new(mock));
let mut hidden = vec![0.1f32; config.hidden_dim];
for block_idx in 0..config.num_layers {
let result = model.forward_block_idx(&hidden, 1, block_idx);
assert!(result.is_ok(), "Block {} should succeed", block_idx);
hidden = result.expect("expected value");
assert_eq!(hidden.len(), config.hidden_dim);
}
}
#[test]
fn test_gpu_model_gqa_multiple_q_per_kv() {
let config = GpuModelConfig {
hidden_dim: 64,
intermediate_dim: 128,
num_layers: 1,
num_heads: 16, num_kv_heads: 4, vocab_size: 50,
eps: 1e-5,
rope_theta: 10000.0,
layer_types: None,
linear_key_head_dim: None,
linear_value_head_dim: None,
linear_num_key_heads: None,
linear_num_value_heads: None,
linear_conv_kernel_dim: None,
constraints: None,
num_experts: None,
num_experts_per_tok: None,
expert_intermediate_size: None,
};
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new("gqa_multiple");
model.with_test_executor(Box::new(mock));
let result = model.forward_gpu(&[1]);
assert!(result.is_ok());
}
#[test]
fn test_gpu_model_gqa_incremental_with_cache() {
let config = create_gqa_config();
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new("gqa_cache");
model.with_test_executor(Box::new(mock));
let mut kv_cache = StreamingKVCache::new(
config.num_layers,
128,
config.num_kv_heads,
config.head_dim(),
);
let _ = model.forward_gpu_with_cache(&[1, 2, 3], &mut kv_cache);
let result = model.forward_gpu_incremental_optimized(4, &mut kv_cache);
assert!(result.is_ok());
}
#[test]
fn test_gpu_model_empty_generate() {
let config = create_minimal_config();
let mut model = GpuModel::new(config).expect("model");
let gen_config = GpuGenerateConfig::deterministic(5);
let result = model.generate(&[], &gen_config);
assert!(result.is_err(), "Generate with empty prompt should fail");
}
#[test]
fn test_gpu_model_token_at_vocab_boundary() {
let config = create_minimal_config();
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new("boundary");
model.with_test_executor(Box::new(mock));
let result = model.forward_gpu(&[config.vocab_size - 1]);
assert!(result.is_ok());
let result = model.forward_gpu(&[config.vocab_size]);
assert!(result.is_err());
}
#[test]
fn test_weight_type_copy_semantics() {
let types = [
WeightType::Qkv,
WeightType::Output,
WeightType::FfnFc1,
WeightType::FfnFc2,
WeightType::LmHead,
];
for wt in types {
let copied = wt; assert!(matches!(
(wt, copied),
(WeightType::Qkv, WeightType::Qkv)
| (WeightType::Output, WeightType::Output)
| (WeightType::FfnFc1, WeightType::FfnFc1)
| (WeightType::FfnFc2, WeightType::FfnFc2)
| (WeightType::LmHead, WeightType::LmHead)
));
}
}
#[test]
fn test_block_weights_all_fields() {
let config = create_minimal_config();
let block = create_block_weights_with_swiglu(&config);
assert!(!block.attn_norm_weight.is_empty());
assert!(!block.attn_norm_bias.is_empty());
assert!(!block.qkv_weight.is_empty());
assert!(!block.qkv_bias.is_empty());
assert!(!block.out_weight.is_empty());
assert!(!block.out_bias.is_empty());
assert!(!block.ffn_norm_weight.is_empty());
assert!(!block.ffn_norm_bias.is_empty());
assert!(!block.ffn_fc1_weight.is_empty());
assert!(!block.ffn_fc1_bias.is_empty());
assert!(!block.ffn_fc2_weight.is_empty());
assert!(!block.ffn_fc2_bias.is_empty());
assert!(block.ffn_gate_weight.is_some());
}
#[test]
fn test_gpu_model_custom_rope_theta() {
let config = GpuModelConfig {
hidden_dim: 32,
intermediate_dim: 64,
num_layers: 1,
num_heads: 2,
num_kv_heads: 2,
vocab_size: 50,
eps: 1e-5,
rope_theta: 500000.0, num_experts: None,
num_experts_per_tok: None,
expert_intermediate_size: None,
};
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new("custom_rope");
model.with_test_executor(Box::new(mock));
let mut kv_cache = StreamingKVCache::new(
config.num_layers,
64,
config.num_kv_heads,
config.head_dim(),
);
let result = model.forward_gpu_incremental_optimized(1, &mut kv_cache);
assert!(result.is_ok());
}
#[test]
fn test_gpu_model_different_eps() {
let configs = [1e-5, 1e-6, 1e-8];
for eps in configs {
let config = GpuModelConfig {
hidden_dim: 32,
intermediate_dim: 64,
num_layers: 1,
num_heads: 2,
num_kv_heads: 2,
vocab_size: 50,
eps,
rope_theta: 10000.0,
layer_types: None,
linear_key_head_dim: None,
linear_value_head_dim: None,
linear_num_key_heads: None,
linear_num_value_heads: None,
linear_conv_kernel_dim: None,
constraints: None,
num_experts: None,
num_experts_per_tok: None,
expert_intermediate_size: None,
};
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new(&format!("eps_{}", eps));
model.with_test_executor(Box::new(mock));
let result = model.forward_gpu(&[1]);
assert!(result.is_ok(), "Forward with eps={} should succeed", eps);
}
}
#[test]
fn test_gpu_model_sequential_incremental_forwards() {
let config = create_minimal_config();
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new("sequential");
model.with_test_executor(Box::new(mock));
let mut kv_cache = StreamingKVCache::new(
config.num_layers,
128,
config.num_kv_heads,
config.head_dim(),
);
for token_id in 0..10 {
let token = token_id % config.vocab_size;
let result = model.forward_gpu_incremental_optimized(token, &mut kv_cache);
assert!(result.is_ok(), "Token {} should succeed", token_id);
}
}
#[test]
fn test_gpu_model_block_incremental_all_layers() {
let config = GpuModelConfig {
hidden_dim: 32,
intermediate_dim: 64,
num_layers: 3,
num_heads: 2,
num_kv_heads: 2,
vocab_size: 50,
eps: 1e-5,
rope_theta: 10000.0,
layer_types: None,
linear_key_head_dim: None,
linear_value_head_dim: None,
linear_num_key_heads: None,
linear_num_value_heads: None,
linear_conv_kernel_dim: None,
constraints: None,
num_experts: None,
num_experts_per_tok: None,
expert_intermediate_size: None,
};
let mut model = GpuModel::new(config.clone()).expect("model");
let mock = MockExecutor::new("block_all_layers");
model.with_test_executor(Box::new(mock));
let mut kv_cache = StreamingKVCache::new(
config.num_layers,
64,
config.num_kv_heads,
config.head_dim(),
);
let mut hidden = vec![0.1f32; config.hidden_dim];
for block_idx in 0..config.num_layers {
let result = model.forward_block_incremental_optimized(&hidden, block_idx, &mut kv_cache);
assert!(result.is_ok(), "Block {} incremental should succeed", block_idx);
hidden = result.expect("expected value");
}
}