use crate::error::RealizarError;
use crate::gguf::config::GGUFConfig;
use crate::gguf::model::OwnedQuantizedModel;
use crate::gguf::quantized::{OwnedQKVWeights, OwnedQuantizedTensor};
use crate::gguf::types::{
GGUF_TYPE_BF16, GGUF_TYPE_F16, GGUF_TYPE_F32, GGUF_TYPE_Q4_0, GGUF_TYPE_Q4_K, GGUF_TYPE_Q8_0,
};
fn test_config(hidden_dim: usize, vocab_size: usize) -> GGUFConfig {
GGUFConfig {
architecture: "test".to_string(),
constraints: crate::gguf::ArchConstraints::from_architecture("test"),
hidden_dim,
num_layers: 1,
num_heads: 4,
num_kv_heads: 4,
vocab_size,
intermediate_dim: hidden_dim * 4,
context_length: 512,
rope_theta: 10000.0,
eps: 1e-5,
rope_type: 0,
explicit_head_dim: None,
bos_token_id: None,
eos_token_id: None,
}
}
fn create_q4_0_test_data(in_dim: usize, out_dim: usize) -> OwnedQuantizedTensor {
const Q4_0_BLOCK_SIZE: usize = 32;
const Q4_0_BLOCK_BYTES: usize = 18;
let blocks_per_row = in_dim.div_ceil(Q4_0_BLOCK_SIZE);
let bytes_per_row = blocks_per_row * Q4_0_BLOCK_BYTES;
let data_size = out_dim * bytes_per_row;
let mut data = vec![0u8; data_size];
for row in 0..out_dim {
for block in 0..blocks_per_row {
let offset = row * bytes_per_row + block * Q4_0_BLOCK_BYTES;
data[offset..offset + 2].copy_from_slice(&0x3C00_u16.to_le_bytes());
}
}
OwnedQuantizedTensor {
data,
in_dim,
out_dim,
qtype: GGUF_TYPE_Q4_0,
}
}
fn create_q8_0_test_data(in_dim: usize, out_dim: usize) -> OwnedQuantizedTensor {
const Q8_0_BLOCK_SIZE: usize = 32;
const Q8_0_BLOCK_BYTES: usize = 34;
let blocks_per_row = in_dim.div_ceil(Q8_0_BLOCK_SIZE);
let bytes_per_row = blocks_per_row * Q8_0_BLOCK_BYTES;
let data_size = out_dim * bytes_per_row;
let mut data = vec![0u8; data_size];
for row in 0..out_dim {
for block in 0..blocks_per_row {
let offset = row * bytes_per_row + block * Q8_0_BLOCK_BYTES;
data[offset..offset + 2].copy_from_slice(&0x3C00_u16.to_le_bytes());
}
}
OwnedQuantizedTensor {
data,
in_dim,
out_dim,
qtype: GGUF_TYPE_Q8_0,
}
}
fn create_q4k_test_data(in_dim: usize, out_dim: usize) -> OwnedQuantizedTensor {
const Q4K_SUPER_BLOCK_SIZE: usize = 256;
const Q4K_SUPER_BLOCK_BYTES: usize = 144;
let super_blocks_per_row = in_dim.div_ceil(Q4K_SUPER_BLOCK_SIZE);
let bytes_per_row = super_blocks_per_row * Q4K_SUPER_BLOCK_BYTES;
let data_size = out_dim * bytes_per_row;
let mut data = vec![0u8; data_size];
for row in 0..out_dim {
for sb in 0..super_blocks_per_row {
let offset = row * bytes_per_row + sb * Q4K_SUPER_BLOCK_BYTES;
data[offset..offset + 2].copy_from_slice(&0x3C00_u16.to_le_bytes());
data[offset + 2..offset + 4].copy_from_slice(&0x0000_u16.to_le_bytes());
}
}
OwnedQuantizedTensor {
data,
in_dim,
out_dim,
qtype: GGUF_TYPE_Q4_K,
}
}
fn create_test_model(hidden_dim: usize, vocab_size: usize) -> OwnedQuantizedModel {
let config = test_config(hidden_dim, vocab_size);
let intermediate_dim = config.intermediate_dim;
let qkv_weight = create_q4k_test_data(hidden_dim, 3 * hidden_dim);
let attn_output_weight = create_q4k_test_data(hidden_dim, hidden_dim);
let ffn_up_weight = create_q4k_test_data(hidden_dim, intermediate_dim);
let ffn_down_weight = create_q4k_test_data(intermediate_dim, hidden_dim);
let layer = crate::gguf::OwnedQuantizedLayer {
attn_norm_weight: vec![1.0f32; hidden_dim],
attn_norm_bias: None,
qkv_weight: OwnedQKVWeights::Fused(qkv_weight),
qkv_bias: None,
attn_output_weight,
attn_output_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_gate_weight: None,
ffn_gate_bias: None,
ffn_norm_weight: Some(vec![1.0f32; hidden_dim]),
ffn_norm_bias: None,
attn_q_norm_weight: None,
attn_k_norm_weight: None,
};
let lm_head_weight = create_q4k_test_data(hidden_dim, vocab_size);
OwnedQuantizedModel {
config,
token_embedding: vec![0.1f32; vocab_size * hidden_dim],
position_embedding: None,
layers: vec![layer],
encoder_layers: vec![],
encoder_output_norm_weight: None,
encoder_output_norm_bias: None,
output_norm_weight: vec![1.0f32; hidden_dim],
output_norm_bias: None,
lm_head_weight,
lm_head_bias: None,
#[cfg(feature = "cuda")]
cuda_executor: None,
#[cfg(feature = "cuda")]
cuda_kernel_count: std::sync::atomic::AtomicU64::new(0),
#[cfg(feature = "cuda")]
cached_weight_names: std::sync::Mutex::new(std::collections::HashSet::new()),
}
}
#[test]
fn test_embed_single_token() {
let model = create_test_model(64, 100);
let embeddings = model.embed(&[0]);
assert_eq!(embeddings.len(), 64);
assert!(embeddings.iter().all(|&x| (x - 0.1).abs() < f32::EPSILON));
}
#[test]
fn test_embed_multiple_tokens() {
let model = create_test_model(64, 100);
let embeddings = model.embed(&[0, 1, 2]);
assert_eq!(embeddings.len(), 64 * 3);
}
#[test]
fn test_embed_out_of_bounds_token() {
let model = create_test_model(64, 100);
let embeddings = model.embed(&[1000]);
assert_eq!(embeddings.len(), 64);
assert!(embeddings.iter().all(|&x| x == 0.0));
}
#[test]
fn test_embed_empty_tokens() {
let model = create_test_model(64, 100);
let embeddings = model.embed(&[]);
assert!(embeddings.is_empty());
}
#[test]
fn test_embed_boundary_token() {
let model = create_test_model(64, 100);
let embeddings = model.embed(&[99]);
assert_eq!(embeddings.len(), 64);
assert!(embeddings.iter().all(|&x| (x - 0.1).abs() < f32::EPSILON));
}
#[test]
fn test_embed_into_valid() {
let model = create_test_model(64, 100);
let mut output = vec![0.0f32; 64];
model.embed_into(0, &mut output);
assert!(output.iter().all(|&x| (x - 0.1).abs() < f32::EPSILON));
}
#[test]
fn test_embed_into_out_of_bounds() {
let model = create_test_model(64, 100);
let mut output = vec![1.0f32; 64];
model.embed_into(1000, &mut output);
assert!(output.iter().all(|&x| x == 0.0));
}
#[test]
fn test_fused_matmul_q4_0_single_token() {
let model = create_test_model(64, 100);
let weight = create_q4_0_test_data(64, 32);
let input = vec![1.0f32; 64];
let result = model.fused_matmul(&input, &weight);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 32);
}
#[test]
fn test_fused_matmul_q4_0_multi_token() {
let model = create_test_model(64, 100);
let weight = create_q4_0_test_data(64, 32);
let input = vec![1.0f32; 64 * 3];
let result = model.fused_matmul(&input, &weight);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 32 * 3);
}
#[test]
fn test_fused_matmul_q8_0_single_token() {
let model = create_test_model(64, 100);
let weight = create_q8_0_test_data(64, 32);
let input = vec![1.0f32; 64];
let result = model.fused_matmul(&input, &weight);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 32);
}
#[test]
fn test_fused_matmul_q8_0_multi_token() {
let model = create_test_model(64, 100);
let weight = create_q8_0_test_data(64, 32);
let input = vec![1.0f32; 64 * 2];
let result = model.fused_matmul(&input, &weight);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 32 * 2);
}
#[test]
fn test_fused_matmul_q4k_single_token() {
let model = create_test_model(256, 100);
let weight = create_q4k_test_data(256, 64);
let input = vec![1.0f32; 256];
let result = model.fused_matmul(&input, &weight);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 64);
}
#[test]
fn test_fused_matmul_q4k_multi_token() {
let model = create_test_model(256, 100);
let weight = create_q4k_test_data(256, 64);
let input = vec![1.0f32; 256 * 4];
let result = model.fused_matmul(&input, &weight);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 64 * 4);
}
fn create_bf16_test_data(in_dim: usize, out_dim: usize, value: f32) -> OwnedQuantizedTensor {
let bf16_bits = (value.to_bits() >> 16) as u16;
let mut data = Vec::with_capacity(out_dim * in_dim * 2);
for _ in 0..(out_dim * in_dim) {
data.extend_from_slice(&bf16_bits.to_le_bytes());
}
OwnedQuantizedTensor {
data,
in_dim,
out_dim,
qtype: GGUF_TYPE_BF16,
}
}
fn create_bf16_identity(dim: usize) -> OwnedQuantizedTensor {
let one_bf16 = (1.0_f32.to_bits() >> 16) as u16;
let zero_bf16 = (0.0_f32.to_bits() >> 16) as u16;
let mut data = Vec::with_capacity(dim * dim * 2);
for row in 0..dim {
for col in 0..dim {
let bits = if row == col { one_bf16 } else { zero_bf16 };
data.extend_from_slice(&bits.to_le_bytes());
}
}
OwnedQuantizedTensor {
data,
in_dim: dim,
out_dim: dim,
qtype: GGUF_TYPE_BF16,
}
}
#[test]
fn falsify_bf16_001_shape_preservation_single_token() {
let model = create_test_model(64, 100);
let weight = create_bf16_test_data(64, 32, 0.0);
let input = vec![1.0f32; 64];
let result = model.fused_matmul(&input, &weight);
assert!(result.is_ok(), "BF16 matmul must not error");
assert_eq!(result.unwrap().len(), 32, "Output shape must be out_dim");
}
#[test]
fn falsify_bf16_002_shape_preservation_multi_token() {
let model = create_test_model(64, 100);
let weight = create_bf16_test_data(64, 32, 0.0);
let input = vec![1.0f32; 64 * 3];
let result = model.fused_matmul(&input, &weight);
assert!(result.is_ok());
assert_eq!(
result.unwrap().len(),
32 * 3,
"Output shape must be out_dim * seq_len"
);
}
#[test]
fn falsify_bf16_003_zero_weights_produce_zero_output() {
let model = create_test_model(64, 100);
let weight = create_bf16_test_data(64, 32, 0.0);
let input = vec![42.0f32; 64];
let output = model.fused_matmul(&input, &weight).unwrap();
for (i, &v) in output.iter().enumerate() {
assert!(
v == 0.0,
"Zero weights must produce zero output, got {v} at index {i}"
);
}
}
#[test]
fn falsify_bf16_004_identity_matmul_preserves_input() {
let dim = 64;
let model = create_test_model(dim, 100);
let weight = create_bf16_identity(dim);
let input: Vec<f32> = (0..dim).map(|i| (i as f32) * 0.1).collect();
let output = model.fused_matmul(&input, &weight).unwrap();
assert_eq!(output.len(), dim);
for (i, (&out, &inp)) in output.iter().zip(input.iter()).enumerate() {
assert!(
(out - inp).abs() < 1e-5,
"Identity matmul must preserve input at index {i}: expected {inp}, got {out}"
);
}
}
#[test]
fn falsify_bf16_005_known_dot_product() {
let model = create_test_model(64, 100);
let one_bf16 = (1.0_f32.to_bits() >> 16) as u16;
let two_bf16 = (2.0_f32.to_bits() >> 16) as u16;
let three_bf16 = (3.0_f32.to_bits() >> 16) as u16;
let four_bf16 = (4.0_f32.to_bits() >> 16) as u16;
let mut data = Vec::new();
data.extend_from_slice(&one_bf16.to_le_bytes());
data.extend_from_slice(&two_bf16.to_le_bytes());
data.extend_from_slice(&three_bf16.to_le_bytes());
data.extend_from_slice(&four_bf16.to_le_bytes());
let weight = OwnedQuantizedTensor {
data,
in_dim: 2,
out_dim: 2,
qtype: GGUF_TYPE_BF16,
};
let input = vec![1.0f32; 2];
let output = model.fused_matmul(&input, &weight).unwrap();
assert_eq!(output.len(), 2);
assert!(
(output[0] - 3.0).abs() < 1e-6,
"Expected 3.0, got {}",
output[0]
);
assert!(
(output[1] - 7.0).abs() < 1e-6,
"Expected 7.0, got {}",
output[1]
);
}
#[test]
fn falsify_bf16_006_f32_equivalence() {
let model = create_test_model(64, 100);
let in_dim = 64;
let out_dim = 16;
let value = 1.5_f32;
let bf16_weight = create_bf16_test_data(in_dim, out_dim, value);
let bf16_decoded = f32::from_bits(((value.to_bits() >> 16) as u32) << 16);
let f32_data: Vec<u8> = (0..out_dim * in_dim)
.flat_map(|_| bf16_decoded.to_le_bytes())
.collect();
let f32_weight = OwnedQuantizedTensor {
data: f32_data,
in_dim,
out_dim,
qtype: GGUF_TYPE_F32,
};
let input: Vec<f32> = (0..in_dim).map(|i| ((i % 7) as f32) * 0.5).collect();
let bf16_output = model.fused_matmul(&input, &bf16_weight).unwrap();
let f32_output = model.fused_matmul(&input, &f32_weight).unwrap();
assert_eq!(bf16_output.len(), f32_output.len());
for (i, (&b, &f)) in bf16_output.iter().zip(f32_output.iter()).enumerate() {
assert!(
(b - f).abs() < 1e-3,
"BF16 must match F32 at index {i}: bf16={b}, f32={f}"
);
}
}
#[test]
fn falsify_bf16_007_finite_output() {
let model = create_test_model(64, 100);
let weight = create_bf16_test_data(64, 32, 0.5);
let input = vec![1.0f32; 64];
let output = model.fused_matmul(&input, &weight).unwrap();
for (i, &v) in output.iter().enumerate() {
assert!(v.is_finite(), "Output must be finite at index {i}, got {v}");
}
}
#[test]
fn test_fused_matmul_unsupported_type() {
let model = create_test_model(64, 100);
let weight = OwnedQuantizedTensor {
data: vec![0u8; 64 * 32 * 4],
in_dim: 64,
out_dim: 32,
qtype: 99, };
let input = vec![1.0f32; 64];
let result = model.fused_matmul(&input, &weight);
assert!(result.is_err());
if let Err(RealizarError::UnsupportedOperation { operation, .. }) = result {
assert_eq!(operation, "owned_fused_matmul");
}
}
#[test]
fn test_fused_matmul_into_q4_0() {
let model = create_test_model(64, 100);
let weight = create_q4_0_test_data(64, 32);
let input = vec![1.0f32; 64];
let mut output = vec![0.0f32; 32];
let result = model.fused_matmul_into(&input, &weight, &mut output);
assert!(result.is_ok());
}
#[test]
fn test_fused_matmul_into_q4k() {
let model = create_test_model(256, 100);
let weight = create_q4k_test_data(256, 64);
let input = vec![1.0f32; 256];
let mut output = vec![0.0f32; 64];
let result = model.fused_matmul_into(&input, &weight, &mut output);
assert!(result.is_ok());
}
#[test]
fn test_fused_matmul_into_multi_token_fallback() {
let model = create_test_model(64, 100);
let weight = create_q4_0_test_data(64, 32);
let input = vec![1.0f32; 64 * 2]; let mut output = vec![0.0f32; 32 * 2];
let result = model.fused_matmul_into(&input, &weight, &mut output);
assert!(result.is_ok());
}
#[test]
fn test_qkv_matmul_fused() {
let model = create_test_model(256, 100);
let qkv_weight = create_q4k_test_data(256, 3 * 256);
let qkv = OwnedQKVWeights::Fused(qkv_weight);
let input = vec![1.0f32; 256];
let result = model.qkv_matmul(&input, &qkv);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 3 * 256);
}
#[test]
fn test_qkv_matmul_separate() {
let model = create_test_model(256, 100);
let q = create_q4k_test_data(256, 256);
let k = create_q4k_test_data(256, 64);
let v = create_q4k_test_data(256, 64);
let qkv = OwnedQKVWeights::Separate { q, k, v };
let input = vec![1.0f32; 256];
let result = model.qkv_matmul(&input, &qkv);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 384);
}
#[test]
fn test_qkv_matmul_separate_multi_position() {
let model = create_test_model(256, 100);
let q = create_q4k_test_data(256, 256);
let k = create_q4k_test_data(256, 64);
let v = create_q4k_test_data(256, 64);
let qkv = OwnedQKVWeights::Separate { q, k, v };
let input = vec![1.0f32; 256 * 2];
let result = model.qkv_matmul(&input, &qkv);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 768);
}
#[test]
fn falsify_em_001_embed_output_shape() {
let hidden_dim = 32;
let vocab_size = 50;
let model = create_test_model(hidden_dim, vocab_size);
for seq_len in [1, 3, 10, 49] {
let tokens: Vec<u32> = (0..seq_len).collect();
let output = model.embed(&tokens);
assert_eq!(
output.len(),
seq_len as usize * hidden_dim,
"FALSIFIED EM-001: embed({seq_len} tokens) produced {} elements, expected {}",
output.len(),
seq_len as usize * hidden_dim
);
}
}
#[test]
fn falsify_em_001b_embed_empty_input() {
let model = create_test_model(32, 50);
let output = model.embed(&[]);
assert!(
output.is_empty(),
"FALSIFIED EM-001b: empty token list should produce empty output"
);
}
#[test]
fn falsify_em_002_embed_oob_produces_zeros() {
let hidden_dim = 32;
let vocab_size = 50;
let model = create_test_model(hidden_dim, vocab_size);
let output = model.embed(&[100]);
assert_eq!(output.len(), hidden_dim);
let all_zero = output.iter().all(|&x| x == 0.0);
assert!(
all_zero,
"FALSIFIED EM-002: OOB token should produce all zeros"
);
}
#[test]
fn falsify_em_002b_embed_boundary_token() {
let hidden_dim = 32;
let vocab_size = 50;
let model = create_test_model(hidden_dim, vocab_size);
let output = model.embed(&[49]);
assert_eq!(output.len(), hidden_dim);
assert!(
output.iter().all(|&x| (x - 0.1).abs() < f32::EPSILON),
"FALSIFIED EM-002b: boundary token should produce valid embeddings"
);
}
#[test]
fn falsify_em_003_embed_determinism() {
let model = create_test_model(32, 50);
let tokens = vec![0u32, 10, 25, 49];
let r1 = model.embed(&tokens);
let r2 = model.embed(&tokens);
assert_eq!(r1, r2, "FALSIFIED EM-003: embed() is non-deterministic");
}
#[test]
fn falsify_em_004_embed_finite_output() {
let model = create_test_model(32, 50);
let tokens: Vec<u32> = (0..50).collect();
let output = model.embed(&tokens);
let nan_count = output.iter().filter(|v| v.is_nan()).count();
let inf_count = output.iter().filter(|v| v.is_infinite()).count();
assert_eq!(
nan_count, 0,
"FALSIFIED EM-004: embed output contains {} NaN values",
nan_count
);
assert_eq!(
inf_count, 0,
"FALSIFIED EM-004: embed output contains {} Inf values",
inf_count
);
}
#[test]
fn falsify_em_005_embed_into_value_correctness() {
let hidden_dim = 32;
let vocab_size = 50;
let model = create_test_model(hidden_dim, vocab_size);
let batch_output = model.embed(&[5]);
let mut single_output = vec![0.0f32; hidden_dim];
model.embed_into(5, &mut single_output);
assert_eq!(
batch_output, single_output,
"FALSIFIED EM-005: embed([5]) != embed_into(5)"
);
}
#[test]
fn falsify_emb_001_lookup_determinism() {
let model = create_test_model(32, 50);
for t in [0u32, 10, 25, 49] {
let v1 = model.embed(&[t]);
let v2 = model.embed(&[t]);
assert_eq!(v1, v2, "FALSIFIED EMB-001: embed({t}) non-deterministic");
}
}
#[test]
fn falsify_emb_002_shape_preservation() {
for (hidden, vocab) in [(32, 50), (64, 100), (16, 200)] {
let model = create_test_model(hidden, vocab);
let tokens = vec![0u32, 1, 2];
let output = model.embed(&tokens);
assert_eq!(
output.len(),
tokens.len() * hidden,
"FALSIFIED EMB-002: hidden={hidden}, n_tokens={}, output len={} != {}",
tokens.len(),
output.len(),
tokens.len() * hidden
);
}
}
#[test]
fn falsify_emb_004_vocabulary_bounds() {
let hidden = 32;
let vocab = 50;
let model = create_test_model(hidden, vocab);
let valid_output = model.embed(&[vocab as u32 - 1]);
let valid_norm: f32 = valid_output.iter().map(|v| v * v).sum();
assert!(
valid_norm > 0.0,
"FALSIFIED EMB-004: valid token {} produced zero embedding",
vocab - 1
);
let first_output = model.embed(&[0]);
let first_norm: f32 = first_output.iter().map(|v| v * v).sum();
assert!(
first_norm > 0.0,
"FALSIFIED EMB-004: valid token 0 produced zero embedding"
);
}
#[test]
fn falsify_emb_005_embed_non_zero() {
let model = create_test_model(32, 50);
let tokens = vec![0u32, 10, 25, 49];
let output = model.embed(&tokens);
let l2_norm: f32 = output.iter().map(|v| v * v).sum::<f32>().sqrt();
assert!(
l2_norm > 1e-6,
"FALSIFIED EMB-005: embed output is all-zero (L2={l2_norm})"
);
}
#[test]
fn falsify_emb_006_temperature_identity_argmax() {
let logits: Vec<f32> = (0..100).map(|i| (i as f32 * 0.31).sin() * 5.0).collect();
let raw_argmax = OwnedQuantizedModel::argmax(&logits);
let scaled: Vec<f32> = logits.iter().map(|&x| x / 1.0).collect();
let scaled_argmax = OwnedQuantizedModel::argmax(&scaled);
assert_eq!(
raw_argmax, scaled_argmax,
"FALSIFIED EMB-006: T=1.0 scaling changed argmax"
);
}
#[test]
fn falsify_emb_007_temperature_monotonicity() {
let mut logits = vec![0.0f32; 50];
logits[10] = 10.0;
logits[20] = 5.0;
logits[30] = 1.0;
let softmax_at = |logits: &[f32], temp: f32| -> Vec<f32> {
let scaled: Vec<f32> = logits.iter().map(|&x| x / temp).collect();
let max_val = scaled.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let exps: Vec<f32> = scaled.iter().map(|&x| (x - max_val).exp()).collect();
let sum: f32 = exps.iter().sum();
exps.iter().map(|&x| x / sum).collect()
};
let probs_low = softmax_at(&logits, 1.0);
let probs_high = softmax_at(&logits, 10.0);
let entropy = |probs: &[f32]| -> f32 {
probs
.iter()
.filter(|&&p| p > 1e-10)
.map(|&p| -p * p.ln())
.sum()
};
let h_low = entropy(&probs_low);
let h_high = entropy(&probs_high);
assert!(
h_high > h_low,
"FALSIFIED EMB-007: higher T should increase entropy, got h_low={h_low}, h_high={h_high}"
);
}
mod em_proptest_falsify {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(50))]
#[test]
fn falsify_em_001_prop_output_shape(
hidden_dim in prop::sample::select(vec![32_usize, 64, 128]),
vocab_size in prop::sample::select(vec![50_usize, 100, 256]),
seq_len in 1_usize..16,
) {
let model = create_test_model(hidden_dim, vocab_size);
let tokens: Vec<u32> = (0..seq_len).map(|i| (i % vocab_size) as u32).collect();
let output = model.embed(&tokens);
prop_assert_eq!(
output.len(), seq_len * hidden_dim,
"FALSIFIED EM-001-prop: len={} != {}*{}={} (v={})",
output.len(), seq_len, hidden_dim, seq_len * hidden_dim, vocab_size
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(30))]
#[test]
fn falsify_em_003_prop_determinism(
hidden_dim in prop::sample::select(vec![32_usize, 64]),
vocab_size in prop::sample::select(vec![50_usize, 100]),
token_ids in proptest::collection::vec(0_u32..49, 1..8),
) {
let model = create_test_model(hidden_dim, vocab_size);
let out1 = model.embed(&token_ids);
let out2 = model.embed(&token_ids);
prop_assert_eq!(
out1, out2,
"FALSIFIED EM-003-prop: two embed calls differ (h={}, v={})",
hidden_dim, vocab_size
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(50))]
#[test]
fn falsify_em_004_prop_finite(
hidden_dim in prop::sample::select(vec![32_usize, 64, 128]),
vocab_size in prop::sample::select(vec![50_usize, 100, 256]),
token_ids in proptest::collection::vec(0_u32..49, 1..8),
) {
let model = create_test_model(hidden_dim, vocab_size);
let output = model.embed(&token_ids);
for (i, &v) in output.iter().enumerate() {
prop_assert!(
v.is_finite(),
"FALSIFIED EM-004-prop: output[{}]={} not finite (h={}, v={})",
i, v, hidden_dim, vocab_size
);
}
}
}
}
mod emb_proptest_falsify {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(50))]
#[test]
fn falsify_emb_001_prop_determinism(
hidden_dim in prop::sample::select(vec![32_usize, 64]),
vocab_size in prop::sample::select(vec![50_usize, 100]),
token_id in 0_u32..49,
) {
let model = create_test_model(hidden_dim, vocab_size);
let v1 = model.embed(&[token_id]);
let v2 = model.embed(&[token_id]);
prop_assert_eq!(
v1, v2,
"FALSIFIED EMB-001-prop: embed({}) non-deterministic (h={}, v={})",
token_id, hidden_dim, vocab_size
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(50))]
#[test]
fn falsify_emb_002_prop_shape(
hidden_dim in prop::sample::select(vec![32_usize, 64, 128]),
vocab_size in prop::sample::select(vec![50_usize, 100, 256]),
seq_len in 1_usize..8,
) {
let model = create_test_model(hidden_dim, vocab_size);
let tokens: Vec<u32> = (0..seq_len).map(|i| (i % vocab_size) as u32).collect();
let output = model.embed(&tokens);
prop_assert_eq!(
output.len(), seq_len * hidden_dim,
"FALSIFIED EMB-002-prop: len={} != {}*{}={} (v={})",
output.len(), seq_len, hidden_dim, seq_len * hidden_dim, vocab_size
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(50))]
#[test]
fn falsify_emb_005_prop_non_zero(
hidden_dim in prop::sample::select(vec![32_usize, 64]),
vocab_size in prop::sample::select(vec![50_usize, 100]),
token_ids in proptest::collection::vec(0_u32..49, 1..4),
) {
let model = create_test_model(hidden_dim, vocab_size);
let output = model.embed(&token_ids);
let l2_norm: f32 = output.iter().map(|v| v * v).sum::<f32>().sqrt();
prop_assert!(
l2_norm > 1e-6,
"FALSIFIED EMB-005-prop: all-zero output (L2={}, h={}, v={})",
l2_norm, hidden_dim, vocab_size
);
}
}
}
#[test]
fn falsify_te_001_lm_head_output_shape() {
let hidden_dim = 32;
let vocab_size = 50;
let model = create_test_model(hidden_dim, vocab_size);
let hidden_state = vec![0.1f32; hidden_dim];
let logits = model
.fused_matmul(&hidden_state, model.lm_head_weight())
.expect("fused_matmul should succeed");
assert_eq!(
logits.len(),
vocab_size,
"FALSIFIED TE-001: lm_head output should be vocab_size={vocab_size}, got {}",
logits.len()
);
}
#[test]
fn falsify_te_004_lm_head_finite_output() {
let hidden_dim = 32;
let vocab_size = 50;
let model = create_test_model(hidden_dim, vocab_size);
let hidden_state = vec![0.1f32; hidden_dim];
let logits = model
.fused_matmul(&hidden_state, model.lm_head_weight())
.expect("fused_matmul should succeed");
let nan_count = logits.iter().filter(|v| v.is_nan()).count();
let inf_count = logits.iter().filter(|v| v.is_infinite()).count();
assert_eq!(
nan_count, 0,
"FALSIFIED TE-004: lm_head output contains {nan_count} NaN values"
);
assert_eq!(
inf_count, 0,
"FALSIFIED TE-004: lm_head output contains {inf_count} Inf values"
);
}
#[test]
fn falsify_emb_003_tied_embeddings_dimension_match() {
let hidden_dim = 32;
let vocab_size = 50;
let model = create_test_model(hidden_dim, vocab_size);
assert_eq!(
model.embed(&[0]).len(),
hidden_dim,
"FALSIFIED EMB-003: embedding dim mismatch"
);
let lm_head = model.lm_head_weight();
assert_eq!(
lm_head.out_dim, vocab_size,
"FALSIFIED EMB-003: lm_head out_dim={} != vocab_size={vocab_size}",
lm_head.out_dim
);
assert_eq!(
lm_head.in_dim, hidden_dim,
"FALSIFIED EMB-003: lm_head in_dim={} != hidden_dim={hidden_dim}",
lm_head.in_dim
);
}
fn create_test_model_with_pos_embed(
hidden_dim: usize,
vocab_size: usize,
max_positions: usize,
) -> OwnedQuantizedModel {
let mut config = test_config(hidden_dim, vocab_size);
config.architecture = "gpt2".to_string();
config.constraints = crate::gguf::ArchConstraints::from_architecture("gpt2");
config.context_length = max_positions;
let intermediate_dim = config.intermediate_dim;
let qkv_weight = create_q4k_test_data(hidden_dim, 3 * hidden_dim);
let attn_output_weight = create_q4k_test_data(hidden_dim, hidden_dim);
let ffn_up_weight = create_q4k_test_data(hidden_dim, intermediate_dim);
let ffn_down_weight = create_q4k_test_data(intermediate_dim, hidden_dim);
let layer = crate::gguf::OwnedQuantizedLayer {
attn_norm_weight: vec![1.0f32; hidden_dim],
attn_norm_bias: None,
qkv_weight: OwnedQKVWeights::Fused(qkv_weight),
qkv_bias: None,
attn_output_weight,
attn_output_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_gate_weight: None,
ffn_gate_bias: None,
ffn_norm_weight: Some(vec![1.0f32; hidden_dim]),
ffn_norm_bias: None,
attn_q_norm_weight: None,
attn_k_norm_weight: None,
};
let lm_head_weight = create_q4k_test_data(hidden_dim, vocab_size);
let pos_embed: Vec<f32> = (0..max_positions * hidden_dim)
.map(|i| (i as f32 * 0.03).sin() * 0.1)
.collect();
OwnedQuantizedModel {
config,
token_embedding: vec![0.1f32; vocab_size * hidden_dim],
position_embedding: Some(pos_embed),
layers: vec![layer],
encoder_layers: vec![],
encoder_output_norm_weight: None,
encoder_output_norm_bias: None,
output_norm_weight: vec![1.0f32; hidden_dim],
output_norm_bias: None,
lm_head_weight,
lm_head_bias: None,
#[cfg(feature = "cuda")]
cuda_executor: None,
#[cfg(feature = "cuda")]
cuda_kernel_count: std::sync::atomic::AtomicU64::new(0),
#[cfg(feature = "cuda")]
cached_weight_names: std::sync::Mutex::new(std::collections::HashSet::new()),
}
}
#[test]
fn falsify_ap_001_shape_preservation() {
let hidden_dim = 32;
let vocab_size = 50;
let max_pos = 128;
let model_with_pos = create_test_model_with_pos_embed(hidden_dim, vocab_size, max_pos);
let model_without_pos = create_test_model(hidden_dim, vocab_size);
for seq_len in [1, 3, 10] {
let tokens: Vec<u32> = (0..seq_len).collect();
let embed_with = model_with_pos.embed(&tokens);
let embed_without = model_without_pos.embed(&tokens);
assert_eq!(
embed_with.len(),
embed_without.len(),
"FALSIFIED AP-001: position add changed embed shape for seq_len={seq_len}"
);
assert_eq!(
embed_with.len(),
seq_len as usize * hidden_dim,
"FALSIFIED AP-001: embed shape wrong for seq_len={seq_len}"
);
}
}
#[test]
fn falsify_ap_002_additive_identity() {
let hidden_dim = 32;
let vocab_size = 50;
let max_pos = 128;
let mut config = test_config(hidden_dim, vocab_size);
config.architecture = "gpt2".to_string();
config.constraints = crate::gguf::ArchConstraints::from_architecture("gpt2");
config.context_length = max_pos;
let intermediate_dim = config.intermediate_dim;
let qkv_weight = create_q4k_test_data(hidden_dim, 3 * hidden_dim);
let attn_output_weight = create_q4k_test_data(hidden_dim, hidden_dim);
let ffn_up_weight = create_q4k_test_data(hidden_dim, intermediate_dim);
let ffn_down_weight = create_q4k_test_data(intermediate_dim, hidden_dim);
let layer = crate::gguf::OwnedQuantizedLayer {
attn_norm_weight: vec![1.0f32; hidden_dim],
attn_norm_bias: None,
qkv_weight: OwnedQKVWeights::Fused(qkv_weight),
qkv_bias: None,
attn_output_weight,
attn_output_bias: None,
ffn_up_weight,
ffn_up_bias: None,
ffn_down_weight,
ffn_down_bias: None,
ffn_gate_weight: None,
ffn_gate_bias: None,
ffn_norm_weight: Some(vec![1.0f32; hidden_dim]),
ffn_norm_bias: None,
attn_q_norm_weight: None,
attn_k_norm_weight: None,
};
let lm_head_weight = create_q4k_test_data(hidden_dim, vocab_size);
let zero_pos = vec![0.0f32; max_pos * hidden_dim];
let model_zero = OwnedQuantizedModel {
config: config.clone(),
token_embedding: vec![0.1f32; vocab_size * hidden_dim],
position_embedding: Some(zero_pos),
layers: vec![layer],
encoder_layers: vec![],
encoder_output_norm_weight: None,
encoder_output_norm_bias: None,
output_norm_weight: vec![1.0f32; hidden_dim],
output_norm_bias: None,
lm_head_weight,
lm_head_bias: None,
#[cfg(feature = "cuda")]
cuda_executor: None,
#[cfg(feature = "cuda")]
cuda_kernel_count: std::sync::atomic::AtomicU64::new(0),
#[cfg(feature = "cuda")]
cached_weight_names: std::sync::Mutex::new(std::collections::HashSet::new()),
};
let model_none = create_test_model(hidden_dim, vocab_size);
let tokens = vec![5u32, 10, 20];
let embed_zero = model_zero.embed(&tokens);
let embed_none = model_none.embed(&tokens);
assert_eq!(
embed_zero, embed_none,
"FALSIFIED AP-002: zero pos_embed should not change embed() output"
);
}
#[test]
fn falsify_ap_004_finite_output() {
let hidden_dim = 32;
let vocab_size = 50;
let max_pos = 128;
let model = create_test_model_with_pos_embed(hidden_dim, vocab_size, max_pos);
let tokens: Vec<u32> = (0..10).collect();
let embed = model.embed(&tokens);
let nan_count = embed.iter().filter(|v| v.is_nan()).count();
let inf_count = embed.iter().filter(|v| v.is_infinite()).count();
assert_eq!(nan_count, 0, "FALSIFIED AP-004: embed has {nan_count} NaN");
assert_eq!(inf_count, 0, "FALSIFIED AP-004: embed has {inf_count} Inf");
}
mod ap_proptest_falsify {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(30))]
#[test]
fn falsify_ap_001_prop_shape(
hidden_dim in prop::sample::select(vec![32_usize, 64]),
vocab_size in prop::sample::select(vec![50_usize, 100]),
seq_len in 1_usize..16,
) {
let max_pos = 128;
let model = create_test_model_with_pos_embed(hidden_dim, vocab_size, max_pos);
let tokens: Vec<u32> = (0..seq_len).map(|i| (i % vocab_size) as u32).collect();
let embed = model.embed(&tokens);
prop_assert_eq!(
embed.len(), seq_len * hidden_dim,
"FALSIFIED AP-001-prop: embed len={} != {}*{}={} (v={})",
embed.len(), seq_len, hidden_dim, seq_len * hidden_dim, vocab_size
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(30))]
#[test]
fn falsify_ap_004_prop_finite(
hidden_dim in prop::sample::select(vec![32_usize, 64]),
vocab_size in prop::sample::select(vec![50_usize, 100]),
seq_len in 1_usize..16,
) {
let max_pos = 128;
let model = create_test_model_with_pos_embed(hidden_dim, vocab_size, max_pos);
let tokens: Vec<u32> = (0..seq_len).map(|i| (i % vocab_size) as u32).collect();
let embed = model.embed(&tokens);
for (i, &v) in embed.iter().enumerate() {
prop_assert!(
v.is_finite(),
"FALSIFIED AP-004-prop: embed[{i}]={v} not finite (h={hidden_dim}, v={vocab_size}, s={seq_len})"
);
}
}
}
}
#[test]
fn falsify_te_002_tied_equivalence() {
let hidden_dim = 32;
let vocab_size = 50;
let model_a = create_test_model(hidden_dim, vocab_size);
let mut model_b = create_test_model(hidden_dim, vocab_size);
let tied_weight_data = model_b.token_embedding.clone();
let tied_bytes: Vec<u8> = tied_weight_data
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
let tied_lm_head = OwnedQuantizedTensor {
data: tied_bytes,
qtype: GGUF_TYPE_F32,
in_dim: hidden_dim,
out_dim: vocab_size,
};
model_b.lm_head_weight = tied_lm_head.clone();
let hidden_state = vec![0.5f32; hidden_dim];
let logits_tied = model_b
.fused_matmul(&hidden_state, &model_b.lm_head_weight)
.expect("tied fused_matmul should succeed");
let mut expected = vec![0.0f32; vocab_size];
for j in 0..vocab_size {
for i in 0..hidden_dim {
expected[j] += hidden_state[i] * tied_weight_data[j * hidden_dim + i];
}
}
assert_eq!(
logits_tied.len(),
vocab_size,
"FALSIFIED TE-002: tied lm_head output len {} != vocab_size {}",
logits_tied.len(),
vocab_size
);
for (j, (&actual, &exp)) in logits_tied.iter().zip(expected.iter()).enumerate() {
assert!(
(actual - exp).abs() < 1e-3,
"FALSIFIED TE-002: logits[{j}] = {actual} != expected {exp}"
);
}
}
#[test]
fn falsify_te_003_tied_no_extra_weight_data() {
let hidden_dim = 32;
let vocab_size = 50;
let token_embedding = vec![0.1f32; vocab_size * hidden_dim];
let tied_data: Vec<u8> = token_embedding
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
let tied_lm_head = OwnedQuantizedTensor {
data: tied_data.clone(),
qtype: GGUF_TYPE_F32,
in_dim: hidden_dim,
out_dim: vocab_size,
};
let embed_bytes = token_embedding.len() * std::mem::size_of::<f32>();
assert_eq!(
tied_lm_head.data.len(),
embed_bytes,
"FALSIFIED TE-003: tied lm_head data ({}) != embedding bytes ({embed_bytes})",
tied_lm_head.data.len()
);
let expected_f32_bytes = vocab_size * hidden_dim * 4;
assert_eq!(
tied_lm_head.data.len(),
expected_f32_bytes,
"FALSIFIED TE-003: tied F32 lm_head data ({}) != expected ({expected_f32_bytes})",
tied_lm_head.data.len()
);
}
mod te_proptest_falsify {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(30))]
#[test]
fn falsify_te_001_prop_output_shape(
hidden_dim in prop::sample::select(vec![32_usize, 64, 128]),
vocab_size in prop::sample::select(vec![50_usize, 100, 256, 512]),
) {
let model = create_test_model(hidden_dim, vocab_size);
let hidden_state = vec![0.1f32; hidden_dim];
let logits = model
.fused_matmul(&hidden_state, model.lm_head_weight())
.expect("fused_matmul should succeed");
prop_assert_eq!(
logits.len(), vocab_size,
"FALSIFIED TE-001-prop: logits len={} != vocab_size={} (h={})",
logits.len(), vocab_size, hidden_dim
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(20))]
#[test]
fn falsify_te_002_prop_tied_equivalence(
hidden_dim in prop::sample::select(vec![32_usize, 64]),
vocab_size in prop::sample::select(vec![50_usize, 100]),
scale in 0.01_f32..2.0,
) {
let mut model = create_test_model(hidden_dim, vocab_size);
let tied_data = model.token_embedding.clone();
let tied_bytes: Vec<u8> = tied_data
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
let tied_lm_head = OwnedQuantizedTensor {
data: tied_bytes,
qtype: GGUF_TYPE_F32,
in_dim: hidden_dim,
out_dim: vocab_size,
};
model.lm_head_weight = tied_lm_head;
let hidden_state: Vec<f32> = (0..hidden_dim)
.map(|i| (i as f32 * 0.07 * scale).sin())
.collect();
let logits = model
.fused_matmul(&hidden_state, &model.lm_head_weight)
.expect("tied fused_matmul should succeed");
let mut expected = vec![0.0f32; vocab_size];
for j in 0..vocab_size {
for i in 0..hidden_dim {
expected[j] += hidden_state[i] * tied_data[j * hidden_dim + i];
}
}
for (j, (&actual, &exp)) in logits.iter().zip(expected.iter()).enumerate() {
prop_assert!(
(actual - exp).abs() < 1e-3,
"FALSIFIED TE-002-prop: logits[{j}]={actual} != expected {exp} (h={hidden_dim}, v={vocab_size})"
);
}
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(50))]
#[test]
fn falsify_te_004_prop_finite(
hidden_dim in prop::sample::select(vec![32_usize, 64, 128]),
vocab_size in prop::sample::select(vec![50_usize, 100, 256]),
scale in 0.001_f32..10.0,
) {
let model = create_test_model(hidden_dim, vocab_size);
let hidden_state: Vec<f32> = (0..hidden_dim)
.map(|i| (i as f32 * 0.03 * scale).cos())
.collect();
let logits = model
.fused_matmul(&hidden_state, model.lm_head_weight())
.expect("fused_matmul should succeed");
for (i, &v) in logits.iter().enumerate() {
prop_assert!(
v.is_finite(),
"FALSIFIED TE-004-prop: logits[{i}]={v} not finite (h={hidden_dim}, v={vocab_size})"
);
}
}
}
}
#[test]
fn falsify_ap_003_max_position_bounds() {
let hidden_dim = 32;
let vocab_size = 50;
let max_pos = 16;
let model = create_test_model_with_pos_embed(hidden_dim, vocab_size, max_pos);
let tokens_within: Vec<u32> = (0..max_pos as u32).map(|i| i % vocab_size as u32).collect();
let embed_within = model.embed(&tokens_within);
assert_eq!(
embed_within.len(),
max_pos * hidden_dim,
"FALSIFIED AP-003: within-bounds embed shape wrong"
);
assert!(
embed_within.iter().all(|v| v.is_finite()),
"FALSIFIED AP-003: within-bounds embed contains non-finite values"
);
let model_no_pos = create_test_model(hidden_dim, vocab_size);
let embed_no_pos = model_no_pos.embed(&tokens_within);
let diff_count = embed_within
.iter()
.zip(embed_no_pos.iter())
.filter(|(&a, &b)| (a - b).abs() > 1e-10)
.count();
let _ = diff_count; }
#[test]
fn falsify_pipe_001_embed_lm_head_softmax_pipeline() {
let hidden_dim = 32;
let vocab_size = 50;
let model = create_test_model(hidden_dim, vocab_size);
let tokens = vec![0u32, 3, 10, 25, 49];
let embedded = model.embed(&tokens);
assert_eq!(
embedded.len(),
tokens.len() * hidden_dim,
"FALSIFIED PIPE-001/EM-001: embed len={} != {}*{hidden_dim}",
embedded.len(),
tokens.len()
);
for (i, &v) in embedded.iter().enumerate() {
assert!(
v.is_finite(),
"FALSIFIED PIPE-001/EM-004: embed[{i}] = {v} not finite"
);
}
let last_hidden = &embedded[(tokens.len() - 1) * hidden_dim..];
let logits = model.fused_rmsnorm_lm_head(last_hidden).unwrap();
assert_eq!(
logits.len(),
vocab_size,
"FALSIFIED PIPE-001/TE-001: logits len={} != vocab_size={vocab_size}",
logits.len()
);
for (i, &l) in logits.iter().enumerate() {
assert!(
l.is_finite(),
"FALSIFIED PIPE-001/TE-004: logits[{i}] = {l} not finite"
);
}
let max_val = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let exps: Vec<f32> = logits.iter().map(|&x| (x - max_val).exp()).collect();
let sum: f32 = exps.iter().sum();
let probs: Vec<f32> = exps.iter().map(|&e| e / sum).collect();
let prob_sum: f32 = probs.iter().sum();
assert!(
(prob_sum - 1.0).abs() < 1e-4,
"FALSIFIED PIPE-001/SM-001: prob sum={prob_sum}"
);
for (i, &p) in probs.iter().enumerate() {
assert!(
p >= 0.0,
"FALSIFIED PIPE-001/SM-002: prob[{i}]={p} negative"
);
}
let logit_argmax = logits
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
.unwrap()
.0;
let prob_argmax = probs
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
.unwrap()
.0;
assert_eq!(
logit_argmax, prob_argmax,
"FALSIFIED PIPE-001/SM-003: argmax changed {} → {}",
logit_argmax, prob_argmax
);
}
include!("matmul_qkv_norm_tests.rs");