1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// CONTRACT: tensor-shape-flow-v1.yaml
// HASH: sha256:a3b4c5d6e7f89013
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`
use proptest::prelude::*;
/// Strategy: generate valid transformer configs for shape flow verification.
fn shape_config_strategy() -> impl Strategy<Value = (usize, usize, usize, usize, usize, usize)> {
// num_kv_heads in [1, 8]
(1usize..=8).prop_flat_map(|n_kv| {
// gqa_group in [1, 8] => num_heads = n_kv * gqa_group
(1usize..=8).prop_flat_map(move |gqa_group| {
let n_h = n_kv * gqa_group;
// head_dim even, in [2, 64]
(1usize..=32).prop_map(move |half_d_k| {
let d_k = half_d_k * 2;
let hidden = n_h * d_k;
let d_ff = hidden * 2 + 128; // guaranteed > hidden
let vocab = 1000 + (hidden % 500); // some vocab size
(hidden, n_h, n_kv, d_k, d_ff, vocab)
})
})
})
}
proptest! {
/// Obligation: QKV shape compatibility (invariant)
/// Formal: Q_dim = n_h * d_k, K_dim = n_kv * d_k, V_dim = n_kv * d_k
#[test]
fn prop_qkv_shape_compatibility(
(hidden, n_h, n_kv, d_k, _d_ff, _vocab) in shape_config_strategy()
) {
let q_dim = n_h * d_k;
let k_dim = n_kv * d_k;
let v_dim = n_kv * d_k;
// Q projection: weight shape [q_dim, hidden], output [q_dim]
prop_assert_eq!(q_dim, n_h * d_k, "Q output dim mismatch");
prop_assert_eq!(k_dim, n_kv * d_k, "K output dim mismatch");
prop_assert_eq!(v_dim, n_kv * d_k, "V output dim mismatch");
// Input to projection is [hidden], weight is [out_dim, hidden]
// So matmul: [hidden] × [hidden, out_dim] → [out_dim]
prop_assert_eq!(hidden, n_h * d_k, "hidden != n_h * d_k — config inconsistent");
}
/// Obligation: GQA grouping exact (invariant)
/// Formal: n_h % n_kv == 0
#[test]
fn prop_gqa_grouping_exact(
(_, n_h, n_kv, _, _, _) in shape_config_strategy()
) {
prop_assert_eq!(
n_h % n_kv, 0,
"GQA group size not integer: n_h={}, n_kv={}", n_h, n_kv
);
let group_size = n_h / n_kv;
prop_assert!(
group_size >= 1,
"GQA group size must be >= 1, got {}", group_size
);
}
/// Obligation: Residual shape preservation (invariant)
/// Formal: shape(x + sublayer(x)) == shape(x) == [hidden]
#[test]
fn prop_residual_shape_preservation(
(hidden, n_h, n_kv, d_k, d_ff, _vocab) in shape_config_strategy()
) {
// Attention sublayer: input [hidden] → Q/K/V projections → attention → output projection → [hidden]
let attn_input = hidden;
let attn_output = n_h * d_k; // attention concat all heads
// Output projection: [n_h * d_k, hidden] maps back to hidden
let attn_projected = hidden; // after o_proj
prop_assert_eq!(
attn_input, attn_projected,
"attention residual shape mismatch: in={}, out={}", attn_input, attn_projected
);
// FFN sublayer: input [hidden] → gate/up [d_ff] → down [hidden]
let ffn_input = hidden;
let ffn_output = hidden; // after down projection
prop_assert_eq!(
ffn_input, ffn_output,
"FFN residual shape mismatch: in={}, out={}", ffn_input, ffn_output
);
// Both residual connections preserve [hidden]
let _ = (attn_output, d_ff, n_kv);
}
/// Obligation: SwiGLU intermediate shape (invariant)
/// Formal: gate/up: [h]→[d_ff], down: [d_ff]→[h]
#[test]
fn prop_swiglu_intermediate_shape(
(hidden, _, _, _, d_ff, _) in shape_config_strategy()
) {
// Gate weight: [d_ff, hidden] — projects hidden → d_ff
let gate_out = d_ff;
// Up weight: [d_ff, hidden] — projects hidden → d_ff
let up_out = d_ff;
// SiLU(gate_out) * up_out → [d_ff] (element-wise, same dim)
prop_assert_eq!(gate_out, up_out, "gate and up output dims must match");
// Down weight: [hidden, d_ff] — projects d_ff → hidden
let down_out = hidden;
prop_assert!(
d_ff > hidden,
"FFN must expand: d_ff={} <= hidden={}", d_ff, hidden
);
prop_assert_eq!(
down_out, hidden,
"down projection must restore hidden dim"
);
}
/// Obligation: LM head output shape (invariant)
/// Formal: output_dim == vocab_size
#[test]
fn prop_lm_head_output_shape(
(hidden, _, _, _, _, vocab) in shape_config_strategy()
) {
// LM head weight: [vocab, hidden]
// Matmul: [hidden] × [hidden, vocab] → [vocab]
let lm_head_rows = vocab;
let lm_head_cols = hidden;
let output_dim = lm_head_rows; // output dimension = vocab
prop_assert_eq!(
lm_head_cols, hidden,
"LM head input dim must match hidden: {} != {}", lm_head_cols, hidden
);
prop_assert_eq!(
output_dim, vocab,
"LM head output dim must be vocab_size: {} != {}", output_dim, vocab
);
prop_assert!(
vocab > 0,
"vocab_size must be positive"
);
}
/// Obligation: SIMD shape equivalence (equivalence)
#[test]
#[ignore = "SIMD equivalence — trueno domain"]
fn prop_simd_equivalence(
_x in proptest::collection::vec(-10.0f32..10.0, 1..32usize)
) {
// Shape flow is pure integer math — no SIMD variant
}
}