use crate::models::qwen2::Embedding;
#[test]
fn falsify_em_001_output_shape_correctness() {
let vocab_size = 100;
let hidden_size = 64;
let emb = Embedding::new(vocab_size, hidden_size);
for seq_len in [1, 2, 5, 10, 50] {
let input_ids: Vec<u32> = (0..seq_len).collect();
let output = emb.forward(&input_ids);
assert_eq!(
output.shape(),
&[1, seq_len as usize, hidden_size],
"FALSIFIED EM-001: seq_len={seq_len}, got shape {:?}, expected [1, {seq_len}, {hidden_size}]",
output.shape()
);
}
}
#[test]
fn falsify_em_001_empty_input() {
let emb = Embedding::new(100, 64);
let output = emb.forward(&[]);
assert_eq!(
output.shape(),
&[1, 0, 64],
"FALSIFIED EM-001: empty input should produce [1, 0, 64], got {:?}",
output.shape()
);
}
#[test]
fn falsify_em_002_valid_ids_no_panic() {
let vocab_size = 100;
let emb = Embedding::new(vocab_size, 32);
let output = emb.forward(&[0, 1, 99]);
assert_eq!(output.shape(), &[1, 3, 32]);
let data = output.data();
let all_finite = data.iter().all(|v| v.is_finite());
assert!(
all_finite,
"FALSIFIED EM-002: valid IDs produced non-finite values"
);
}
#[test]
fn falsify_em_002_oob_produces_zeros_not_panic() {
let vocab_size = 100;
let emb = Embedding::new(vocab_size, 32);
let output = emb.forward(&[100]);
let data = output.data();
let is_zero = data.iter().all(|v| *v == 0.0);
assert!(
is_zero,
"FALSIFIED EM-002: OOB token_id=100 (vocab={vocab_size}) should produce \
zeros (N-09 documented escape), but got non-zero values"
);
}
#[test]
fn falsify_em_002_large_oob_no_panic() {
let emb = Embedding::new(100, 32);
let output = emb.forward(&[u32::MAX, u32::MAX - 1, 1_000_000]);
assert_eq!(output.shape(), &[1, 3, 32]);
}
#[test]
fn falsify_em_003_deterministic_output() {
let emb = Embedding::new(200, 48);
let ids = vec![0u32, 5, 10, 50, 100, 199];
let out1 = emb.forward(&ids);
let out2 = emb.forward(&ids);
assert_eq!(
out1.data(),
out2.data(),
"FALSIFIED EM-003: two identical calls produced different output (non-determinism)"
);
}
#[test]
fn falsify_em_003_order_matters() {
let emb = Embedding::new(100, 32);
let out_a = emb.forward(&[1, 2, 3]);
let out_b = emb.forward(&[3, 2, 1]);
assert_eq!(out_a.shape(), out_b.shape());
let d = 32;
let a_tok1 = &out_a.data()[0..d];
let b_tok1 = &out_b.data()[2 * d..3 * d];
assert_eq!(
a_tok1, b_tok1,
"FALSIFIED EM-003: token 1 embedding differs based on position in sequence"
);
}
#[test]
fn falsify_em_004_finite_output() {
let emb = Embedding::new(500, 128);
let ids: Vec<u32> = (0..500).step_by(7).collect();
let output = emb.forward(&ids);
for (i, v) in output.data().iter().enumerate() {
assert!(
v.is_finite(),
"FALSIFIED EM-004: output[{i}] = {v} is not finite (NaN or Inf)"
);
}
}
#[test]
fn falsify_emb_001_lookup_determinism() {
let emb = Embedding::new(1000, 64);
for t in [0u32, 1, 42, 500, 999] {
let v1 = emb.forward(&[t]);
let v2 = emb.forward(&[t]);
assert_eq!(
v1.data(),
v2.data(),
"FALSIFIED EMB-001: embed({t}) != embed({t}) — non-deterministic lookup"
);
}
}
#[test]
fn falsify_emb_002_shape_preservation() {
for (v, d) in [(100, 64), (256, 128), (1000, 256), (50, 32)] {
let emb = Embedding::new(v, d);
let output = emb.forward(&[0, 1, 2]);
assert_eq!(
output.shape(),
&[1, 3, d],
"FALSIFIED EMB-002: vocab={v}, d_model={d}, got shape {:?}",
output.shape()
);
}
}
#[test]
fn falsify_emb_003_weight_shape_suitable_for_tied() {
let vocab_size = 200;
let d_model = 64;
let emb = Embedding::new(vocab_size, d_model);
let w = emb.weight();
assert_eq!(
w.shape(),
&[vocab_size, d_model],
"FALSIFIED EMB-003: weight shape {:?} != [{vocab_size}, {d_model}] — \
cannot be used for tied unembedding (logits = h @ W^T)",
w.shape()
);
}
#[test]
fn falsify_emb_003_tied_logit_computation() {
let vocab = 50;
let d = 16;
let emb = Embedding::new(vocab, d);
let w_data = emb.weight().data().to_vec();
for t in [0u32, 1, 25, 49] {
let embed_vec = emb.forward(&[t]);
let ev = embed_vec.data();
let e = &ev[0..d];
let logits: Vec<f32> = (0..vocab)
.map(|v| {
let row_start = v * d;
(0..d).map(|j| e[j] * w_data[row_start + j]).sum()
})
.collect();
for (v, &l) in logits.iter().enumerate() {
assert!(
l.is_finite(),
"FALSIFIED EMB-003: logit[{v}] = {l} for embed({t}) @ W^T is not finite"
);
}
let self_logit = logits[t as usize];
assert!(
self_logit.is_finite(),
"FALSIFIED EMB-003: self-logit for token {t} is not finite"
);
}
}
#[test]
fn falsify_emb_004_vocab_bounds_valid() {
let vocab = 50;
let emb = Embedding::new(vocab, 16);
for t in 0..vocab as u32 {
let output = emb.forward(&[t]);
let data = output.data();
let norm: f32 = data.iter().map(|v| v * v).sum();
assert!(
norm > 0.0,
"FALSIFIED EMB-004: valid token {t} produced zero embedding (degenerate)"
);
}
}
#[test]
fn falsify_emb_004_vocab_bounds_oob() {
let vocab = 50;
let emb = Embedding::new(vocab, 16);
for t in [vocab as u32, vocab as u32 + 1, u32::MAX] {
let output = emb.forward(&[t]);
let data = output.data();
let norm: f32 = data.iter().map(|v| v * v).sum();
assert!(
norm == 0.0,
"FALSIFIED EMB-004: OOB token {t} (vocab={vocab}) produced non-zero output"
);
}
}
#[test]
fn falsify_emb_005_no_zero_embeddings() {
let vocab = 200;
let d = 32;
let emb = Embedding::new(vocab, d);
let w = emb.weight().data();
for t in 0..vocab {
let row = &w[t * d..(t + 1) * d];
let norm_sq: f32 = row.iter().map(|v| v * v).sum();
assert!(
norm_sq > 0.0,
"FALSIFIED EMB-005: embedding row {t} is all-zero (degenerate). \
Dead row in weight matrix."
);
}
}
#[test]
fn falsify_emb_005_all_finite_norms() {
let vocab = 200;
let d = 32;
let emb = Embedding::new(vocab, d);
let w = emb.weight().data();
for t in 0..vocab {
let row = &w[t * d..(t + 1) * d];
let norm: f32 = row.iter().map(|v| v * v).sum::<f32>().sqrt();
assert!(
norm.is_finite() && norm > 0.0,
"FALSIFIED EMB-005: embedding row {t} has norm={norm} (expected finite & positive)"
);
}
}
#[test]
fn falsify_emb_006_temperature_identity() {
let logits = vec![1.0f32, -2.5, 0.0, 3.14, -0.001, f32::MIN_POSITIVE];
let scaled: Vec<f32> = logits.iter().map(|&l| l / 1.0).collect();
assert_eq!(
logits, scaled,
"FALSIFIED EMB-006: logits / 1.0 != logits (floating-point division not exact)"
);
}
fn softmax(logits: &[f32]) -> Vec<f32> {
let max = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let exps: Vec<f32> = logits.iter().map(|&l| (l - max).exp()).collect();
let sum: f32 = exps.iter().sum();
exps.iter().map(|&e| e / sum).collect()
}
fn entropy(probs: &[f32]) -> f32 {
-probs
.iter()
.filter(|&&p| p > 0.0)
.map(|&p| p * p.ln())
.sum::<f32>()
}
#[test]
fn falsify_emb_007_temperature_monotonicity() {
let logits = vec![2.0f32, 1.0, 0.5, -1.0, 0.0];
let temperatures = [0.1, 0.5, 1.0, 2.0, 5.0, 10.0];
let mut prev_entropy = f32::NEG_INFINITY;
for &t in &temperatures {
let scaled: Vec<f32> = logits.iter().map(|&l| l / t).collect();
let probs = softmax(&scaled);
let h = entropy(&probs);
assert!(
h >= prev_entropy - 1e-6, "FALSIFIED EMB-007: T={t} entropy={h} < prev_entropy={prev_entropy}. \
Temperature scaling monotonicity violated."
);
prev_entropy = h;
}
}
#[test]
fn falsify_te_cross_tied_lm_head_shape() {
let vocab = 50;
let d_model = 16;
let emb = Embedding::new(vocab, d_model);
let w = emb.weight();
assert_eq!(w.shape(), &[vocab, d_model]);
let w_data = w.data();
assert_eq!(
w_data.len(),
vocab * d_model,
"FALSIFIED TE-CROSS: weight length {} != vocab({vocab}) * d_model({d_model})",
w_data.len()
);
}
#[test]
fn falsify_te_cross_tied_logit_shape() {
let vocab = 10;
let d = 4;
let emb = Embedding::new(vocab, d);
let w = emb.weight().data().to_vec();
let seq_len = 3;
let hidden: Vec<f32> = (0..seq_len * d).map(|i| (i as f32) * 0.1).collect();
let mut logits = vec![0.0f32; seq_len * vocab];
for s in 0..seq_len {
for v in 0..vocab {
let mut sum = 0.0f32;
for k in 0..d {
sum += hidden[s * d + k] * w[v * d + k];
}
logits[s * vocab + v] = sum;
}
}
assert_eq!(
logits.len(),
seq_len * vocab,
"FALSIFIED TE-CROSS: logits length {} != seq_len({seq_len}) * vocab({vocab})",
logits.len()
);
for (i, &l) in logits.iter().enumerate() {
assert!(
l.is_finite(),
"FALSIFIED TE-CROSS: logits[{i}] = {l} is not finite"
);
}
}
#[test]
fn falsify_em_cross_enforce_embedding_contract_shape() {
crate::format::layout_contract::enforce_embedding_contract(100 * 64, 100, 64);
crate::format::layout_contract::enforce_embedding_contract(1000 * 128, 1000, 128);
}
#[test]
#[should_panic(expected = "CONTRACT VIOLATION")]
fn falsify_em_cross_enforce_embedding_contract_mismatch() {
crate::format::layout_contract::enforce_embedding_contract(999, 100, 64);
}
mod proptest_falsify {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn falsify_em_001_prop_output_shape(
seq_len in 1_u32..128,
d_model_idx in 0_usize..3,
) {
let d_models = [64_usize, 128, 256];
let d_model = d_models[d_model_idx];
let vocab_size = 500;
let emb = Embedding::new(vocab_size, d_model);
let input_ids: Vec<u32> = (0..seq_len).map(|i| i % vocab_size as u32).collect();
let output = emb.forward(&input_ids);
prop_assert_eq!(
output.shape(),
&[1, seq_len as usize, d_model],
"FALSIFIED EM-001-prop: seq_len={}, d_model={}, shape={:?}",
seq_len, d_model, output.shape()
);
}
}
proptest! {
#[test]
fn falsify_em_002_prop_boundary_oob(
vocab_size in 10_usize..200,
offset in 0_u32..10,
) {
let emb = Embedding::new(vocab_size, 32);
let valid_id = (vocab_size as u32).saturating_sub(1);
let oob_id = vocab_size as u32 + offset;
let valid_output = emb.forward(&[valid_id]);
let valid_norm: f32 = valid_output.data().iter().map(|v| v * v).sum();
prop_assert!(valid_norm > 0.0, "FALSIFIED EM-002-prop: valid token {} produced zero", valid_id);
let oob_output = emb.forward(&[oob_id]);
let oob_norm: f32 = oob_output.data().iter().map(|v| v * v).sum();
prop_assert!(oob_norm == 0.0, "FALSIFIED EM-002-prop: OOB token {} produced non-zero", oob_id);
}
}
proptest! {
#[test]
fn falsify_em_004_prop_finite_output(
vocab_size in 10_usize..300,
d_model in prop::sample::select(vec![16_usize, 32, 64, 128]),
num_tokens in 1_usize..50,
) {
let emb = Embedding::new(vocab_size, d_model);
let ids: Vec<u32> = (0..num_tokens).map(|i| (i % vocab_size) as u32).collect();
let output = emb.forward(&ids);
for (i, v) in output.data().iter().enumerate() {
prop_assert!(
v.is_finite(),
"FALSIFIED EM-004-prop: output[{}] = {} is not finite (vocab={}, d={})",
i, v, vocab_size, d_model
);
}
}
}
proptest! {
#[test]
fn falsify_emb_001_prop_lookup_determinism(
t in 0_u32..999,
) {
let emb = Embedding::new(1000, 64);
let v1 = emb.forward(&[t]);
let v2 = emb.forward(&[t]);
prop_assert_eq!(
v1.data(), v2.data(),
"FALSIFIED EMB-001-prop: embed({}) non-deterministic", t
);
}
}
proptest! {
#[test]
fn falsify_emb_002_prop_shape(
d_model in prop::sample::select(vec![16_usize, 32, 64, 128, 256]),
num_tokens in 1_usize..20,
) {
let vocab = 100;
let emb = Embedding::new(vocab, d_model);
let ids: Vec<u32> = (0..num_tokens).map(|i| (i % vocab) as u32).collect();
let output = emb.forward(&ids);
prop_assert_eq!(
output.shape(),
&[1, num_tokens, d_model],
"FALSIFIED EMB-002-prop: d_model={}, n={}, shape={:?}",
d_model, num_tokens, output.shape()
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn falsify_emb_006_prop_temperature_identity(
logits in proptest::collection::vec(-50.0_f32..50.0, 2..32),
) {
let scaled: Vec<f32> = logits.iter().map(|&l| l / 1.0).collect();
prop_assert_eq!(
logits, scaled,
"FALSIFIED EMB-006-prop: logits / 1.0 != logits"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(200))]
#[test]
fn falsify_emb_007_prop_temperature_monotonicity(
logits in proptest::collection::vec(-10.0_f32..10.0, 3..16),
) {
let temperatures = [0.5_f32, 1.0, 2.0, 5.0];
let mut prev_h = f32::NEG_INFINITY;
for &t in &temperatures {
let scaled: Vec<f32> = logits.iter().map(|&l| l / t).collect();
let probs = super::softmax(&scaled);
let h = super::entropy(&probs);
prop_assert!(
h >= prev_h - 1e-4,
"FALSIFIED EMB-007-prop: T={} entropy={} < prev={}",
t, h, prev_h
);
prev_h = h;
}
}
}
}