1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
// SHIP-TWO-001 §35 — `apr-cli-distill-train-v1` algorithm-level
// PARTIAL discharge for FALSIFY-APR-DISTILL-TRAIN-001.
//
// Contract: `contracts/apr-cli-distill-train-v1.yaml` v1.0.0 PROPOSED.
// Spec: `docs/specifications/aprender-train/ship-two-models-spec.md` §35
// (`apr distill` Standard strategy is currently a stub at distill.rs:1464,
// just `tensor_clone()`, no gradient training).
//
// ## What FALSIFY-APR-DISTILL-TRAIN-001 says
//
// rule: real training (not stub) — student tensors differ post-train
// prediction: After `apr distill --stage train`, at least one tensor
// in `student.apr` differs from input student by >Q4K tolerance.
//
// Today this FAILS — distill is `tensor_clone()` so all max_diffs are 0.
// Once §35.3 implementation lands (KL+CE training loop), the live test
// passes and this contract flips to ACTIVE.
//
// ## What this file proves NOW (`PARTIAL_ALGORITHM_LEVEL`)
//
// The decision rule — "at least one tensor's max_diff% exceeds Q4K
// tolerance" — is pinned. A future implementation cannot regress it
// silently. The mutation survey covers the empty-input, all-zero
// (stub), one-large, all-small, just-at-boundary, and non-finite cases.
/// Q4K-tolerance percentage threshold per CLAUDE.md (±5% element-wise).
///
/// A real-training pass MUST produce at least one tensor whose
/// `max(|new - input|) / max(|input|) * 100` exceeds this threshold.
/// Stub `tensor_clone()` behaviour produces all-zero diffs and so
/// fails this gate — exactly the §35 finding.
pub const AC_DISTILL_TRAIN_001_Q4K_TOLERANCE_PCT: f32 = 5.0;
/// Binary verdict for `FALSIFY-APR-DISTILL-TRAIN-001`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DistillTrain001Verdict {
/// At least one tensor's `max_diff%` exceeds
/// `AC_DISTILL_TRAIN_001_Q4K_TOLERANCE_PCT` (5.0). Real gradient-
/// based training has flowed to weight updates; student.apr is no
/// longer a metadata-only clone of the input.
Pass,
/// One or more of:
/// - Empty `max_diffs_pct` slice (no tensors compared — caller error).
/// - At least one diff is non-finite (NaN, ±∞).
/// - At least one diff is negative (a percentage cannot be negative;
/// conservative `Fail` — implies a buggy `apr diff --values`).
/// - **Every** diff is ≤ 5% — the stub-behaviour signature: the
/// shipped distill loop just cloned tensors, never updated weights.
Fail,
}
/// Pure verdict function for FALSIFY-APR-DISTILL-TRAIN-001.
///
/// Input: per-tensor `max(|new - input|) / max(|input|) * 100` percentages,
/// one per tensor in the student.apr (e.g., one per `apr diff --values`
/// line). Typical Qwen2.5-Coder-7B has 339 tensors.
///
/// # Examples
///
/// Stub (`tensor_clone()`) — all zeros — is `Fail`:
/// ```
/// use aprender::format::distill_train_001::{
/// verdict_from_max_diff_pct, DistillTrain001Verdict,
/// };
/// let stub_diffs = vec![0.0_f32; 339];
/// assert_eq!(
/// verdict_from_max_diff_pct(&stub_diffs),
/// DistillTrain001Verdict::Fail,
/// );
/// ```
///
/// Real training — at least one tensor moved by >5% — is `Pass`:
/// ```
/// use aprender::format::distill_train_001::{
/// verdict_from_max_diff_pct, DistillTrain001Verdict,
/// };
/// let mut diffs = vec![0.5_f32; 339];
/// diffs[100] = 12.4; // one tensor moved 12.4%
/// assert_eq!(
/// verdict_from_max_diff_pct(&diffs),
/// DistillTrain001Verdict::Pass,
/// );
/// ```
#[must_use]
pub fn verdict_from_max_diff_pct(max_diffs_pct: &[f32]) -> DistillTrain001Verdict {
if max_diffs_pct.is_empty() {
return DistillTrain001Verdict::Fail;
}
let mut any_above = false;
for &d in max_diffs_pct {
if !d.is_finite() || d < 0.0 {
return DistillTrain001Verdict::Fail;
}
if d > AC_DISTILL_TRAIN_001_Q4K_TOLERANCE_PCT {
any_above = true;
}
}
if any_above {
DistillTrain001Verdict::Pass
} else {
DistillTrain001Verdict::Fail
}
}
#[cfg(test)]
mod tests {
use super::*;
fn next_up_f32(x: f32) -> f32 {
f32::from_bits(x.to_bits() + 1)
}
// -------------------------------------------------------------------------
// Section 1: Provenance pin — threshold matches CLAUDE.md ±5% Q4K spec.
// -------------------------------------------------------------------------
#[test]
fn provenance_q4k_tolerance_is_five_percent() {
assert_eq!(AC_DISTILL_TRAIN_001_Q4K_TOLERANCE_PCT, 5.0);
}
// -------------------------------------------------------------------------
// Section 2: §35 stub-behaviour signature — `tensor_clone()` produces
// all-zero max_diffs → Fail (catches future regression to stub).
// -------------------------------------------------------------------------
#[test]
fn fail_all_zeros_is_stub_signature() {
// 339 = canonical Qwen2.5-Coder-7B tensor count.
let diffs = vec![0.0_f32; 339];
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Fail,
"all-zero max_diffs is the §35 tensor_clone stub signature; must Fail"
);
}
#[test]
fn fail_all_below_threshold() {
// Even small noisy diffs from quantization round-trip are not
// sufficient evidence of real training.
let diffs = vec![1.0_f32; 339];
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Fail
);
}
#[test]
fn fail_all_at_or_below_boundary() {
// Exact-boundary 5.0% is NOT above the threshold (strict `>`),
// so a uniform 5.0% is still stub-shaped Fail.
let diffs = vec![AC_DISTILL_TRAIN_001_Q4K_TOLERANCE_PCT; 339];
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Fail,
"exact 5.0 must Fail (strict `>` on threshold)"
);
}
// -------------------------------------------------------------------------
// Section 3: Pass band — at least one tensor moved by > Q4K tolerance.
// -------------------------------------------------------------------------
#[test]
fn pass_one_large_move_among_many_small() {
let mut diffs = vec![0.1_f32; 339];
diffs[42] = 12.4;
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Pass
);
}
#[test]
fn pass_just_above_threshold() {
let just_above = next_up_f32(AC_DISTILL_TRAIN_001_Q4K_TOLERANCE_PCT);
assert!(just_above > AC_DISTILL_TRAIN_001_Q4K_TOLERANCE_PCT);
let mut diffs = vec![0.0_f32; 339];
diffs[100] = just_above;
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Pass,
"5.0 + 1 ULP must Pass (strict `>` boundary)"
);
}
#[test]
fn pass_all_above_threshold() {
let diffs = vec![15.0_f32; 339];
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Pass
);
}
// -------------------------------------------------------------------------
// Section 4: Empty-input — caller error → conservative Fail.
// -------------------------------------------------------------------------
#[test]
fn fail_empty_input() {
let diffs: Vec<f32> = vec![];
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Fail,
"empty max_diffs implies no tensors compared — caller error"
);
}
// -------------------------------------------------------------------------
// Section 5: Domain violation — non-finite or negative percentages Fail.
// -------------------------------------------------------------------------
#[test]
fn fail_nan_in_any_position() {
for pos in [0_usize, 50, 338] {
let mut diffs = vec![10.0_f32; 339];
diffs[pos] = f32::NAN;
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Fail,
"NaN at position {pos} must Fail (domain violation)"
);
}
}
#[test]
fn fail_positive_infinity() {
let mut diffs = vec![10.0_f32; 339];
diffs[7] = f32::INFINITY;
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Fail
);
}
#[test]
fn fail_negative_infinity() {
let mut diffs = vec![10.0_f32; 339];
diffs[7] = f32::NEG_INFINITY;
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Fail
);
}
#[test]
fn fail_negative_diff_is_domain_violation() {
// A percentage cannot be negative; `apr diff --values` emitting
// negative max_diff implies a tooling bug, not real training.
let mut diffs = vec![10.0_f32; 339];
diffs[7] = -1.0;
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Fail
);
}
// -------------------------------------------------------------------------
// Section 6: Single-tensor sweep — flipping any one tensor above
// threshold flips the whole verdict to Pass.
// -------------------------------------------------------------------------
#[test]
fn single_tensor_above_threshold_flips_to_pass_at_each_index() {
for i in [0_usize, 1, 100, 169, 338] {
let mut diffs = vec![0.5_f32; 339];
diffs[i] = 50.0;
assert_eq!(
verdict_from_max_diff_pct(&diffs),
DistillTrain001Verdict::Pass,
"single tensor above threshold at index {i} must Pass"
);
}
}
// -------------------------------------------------------------------------
// Section 7: Monotonicity sweep at uniform diff.
// -------------------------------------------------------------------------
#[test]
fn monotonicity_sweep_uniform_diff() {
let probes: Vec<(f32, DistillTrain001Verdict)> = vec![
(0.0, DistillTrain001Verdict::Fail),
(0.5, DistillTrain001Verdict::Fail),
(4.999, DistillTrain001Verdict::Fail),
(
AC_DISTILL_TRAIN_001_Q4K_TOLERANCE_PCT,
DistillTrain001Verdict::Fail,
),
(
next_up_f32(AC_DISTILL_TRAIN_001_Q4K_TOLERANCE_PCT),
DistillTrain001Verdict::Pass,
),
(5.001, DistillTrain001Verdict::Pass),
(10.0, DistillTrain001Verdict::Pass),
(100.0, DistillTrain001Verdict::Pass),
];
for (d, expected) in probes {
let diffs = vec![d; 339];
assert_eq!(
verdict_from_max_diff_pct(&diffs),
expected,
"uniform diff {d} expected {expected:?}"
);
}
}
}