1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
// SHIP-TWO-001 §6 Compound Ship Gates — GATE-SHIP-002 algorithm-level
// PARTIAL discharge.
//
// Spec: docs/specifications/aprender-train/ship-two-models-spec.md §6 row
// `GATE-SHIP-002 | MODEL-2: all 12 AC-SHIP2-* PASS | MODEL-2 publish`.
// Contract: contracts/compound-ship-gates-v1.yaml v1.0.0 PROPOSED
// (FALSIFY-GATE-SHIP-002 — wired in the same PR as this file lands).
//
// GATE-SHIP-002 states that the MODEL-2 sovereign 370M model
// (`paiml/llama-370m-sovereign-apache-v1`) cannot ship unless every
// one of the 12 AC-SHIP2-* acceptance criteria (§4.4 rows AC-SHIP2-001
// through AC-SHIP2-012) reports Pass. The compound gate's decision
// rule is aggregate-AND over the 12 per-AC booleans.
//
// This file discharges the *decision rule* at `PARTIAL_ALGORITHM_LEVEL`:
// given any 12-long boolean array (one per AC-SHIP2 item), the verdict
// is `Pass` iff every AC is `true` AND the array has exactly 12 entries.
// An 11-long input (AC removal) or a 13-long input (AC addition), or
// any single `false` AC, yields `Fail`. The compute-heavy portion
// (actually running all 12 per-AC harnesses against real 370M weights
// on RTX 4090) is intentionally out of scope here.
//
// Twins with GATE-SHIP-001 (MODEL-1 aggregate-AND over 10 ACs);
// differs only in element count (12 vs 10) and the exhaustive bitmask
// proof size (4096 vs 1024). Together they fix the "aggregate-AND over
// N booleans" pattern at two widths for both model publish gates.
/// Number of AC-SHIP2-* acceptance criteria that MODEL-2 must pass for
/// GATE-SHIP-002 to Pass.
///
/// Derivation: `docs/specifications/aprender-train/ship-two-models-spec.md`
/// §4.4 enumerates 12 rows AC-SHIP2-001 through AC-SHIP2-012
/// (architecture scaffold / tokenizer build / corpus ingest /
/// checkpoint cadence / pretrain loop / decode tok/s / `apr qa` /
/// HumanEval / cosine parity / decode floor / chat template /
/// provenance). The literal integer 12 is bound here so that drift
/// in either direction (adding AC-SHIP2-013 without updating
/// GATE-SHIP-002; removing an AC-SHIP2-* without falsifying this
/// test) is caught at compile+test time.
pub const AC_GATE_SHIP_002_MODEL_2_AC_COUNT: usize = 12;
/// Binary verdict for FALSIFY-GATE-SHIP-002 / GATE-SHIP-002.
/// `Pass` iff every AC in the input slice is `true` AND the slice has
/// exactly `AC_GATE_SHIP_002_MODEL_2_AC_COUNT` entries. `Fail`
/// otherwise.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GateShip002Verdict {
/// All 12 AC-SHIP2-* items reported Pass. The MODEL-2 sovereign
/// has cleared every §4.4 acceptance criterion and may proceed
/// to publish (subject to GATE-SHIP-003..006 cross-cutting gates).
Pass,
/// At least one of: slice length ≠ 12; any AC reported `false`.
/// MODEL-2 publish is blocked.
Fail,
}
/// Algorithm-level verdict rule for FALSIFY-GATE-SHIP-002 /
/// GATE-SHIP-002: aggregate-AND over the 12 AC-SHIP2-* booleans.
///
/// # Examples
///
/// ```
/// use aprender::format::gate_ship_002::{
/// verdict_from_model2_ac_aggregate, GateShip002Verdict,
/// };
///
/// // Happy path: all 12 AC-SHIP2-* items Pass.
/// let all_pass = [true; 12];
/// assert_eq!(
/// verdict_from_model2_ac_aggregate(&all_pass),
/// GateShip002Verdict::Pass
/// );
///
/// // Any length ≠ 12 Fails.
/// assert_eq!(
/// verdict_from_model2_ac_aggregate(&[true; 11]),
/// GateShip002Verdict::Fail
/// );
/// ```
#[must_use]
pub const fn verdict_from_model2_ac_aggregate(ac_passes: &[bool]) -> GateShip002Verdict {
if ac_passes.len() != AC_GATE_SHIP_002_MODEL_2_AC_COUNT {
return GateShip002Verdict::Fail;
}
let mut i = 0;
while i < ac_passes.len() {
if !ac_passes[i] {
return GateShip002Verdict::Fail;
}
i += 1;
}
GateShip002Verdict::Pass
}
// ─────────────────────────────────────────────────────────────
// Unit tests — FALSIFY-GATE-SHIP-002 algorithm-level proof
// ─────────────────────────────────────────────────────────────
#[cfg(test)]
mod gate_ship_002_tests {
use super::*;
/// FALSIFY-GATE-SHIP-002 algorithm-level PARTIAL discharge: prove
/// the aggregate-AND decision rule binding the 12 AC-SHIP2-* items
/// to the MODEL-2 publish criterion.
#[test]
fn falsify_gate_ship_002_model2_ac_aggregate() {
// Section 1: all-Pass 12-long array yields Pass.
let all_pass = [true; 12];
assert_eq!(
verdict_from_model2_ac_aggregate(&all_pass),
GateShip002Verdict::Pass,
"all 12 AC-SHIP2 items Pass must yield aggregate Pass",
);
// Section 2: single-AC-flip — each of the 12 positions flipped
// to Fail individually must cause the aggregate to Fail. This
// enforces the AND shape (not OR, not majority, not 11-of-12).
for flip_idx in 0..12 {
let mut acs = [true; 12];
acs[flip_idx] = false;
assert_eq!(
verdict_from_model2_ac_aggregate(&acs),
GateShip002Verdict::Fail,
"flipping AC-SHIP2-{:03} to Fail must break the aggregate",
flip_idx + 1,
);
}
// Section 3: length-drift counter-examples.
// 3a: 0-length empty → Fail.
assert_eq!(
verdict_from_model2_ac_aggregate(&[]),
GateShip002Verdict::Fail,
"empty AC array must Fail — contract-drift guard",
);
// 3b: 11-long all-true → Fail (AC removed without spec update).
assert_eq!(
verdict_from_model2_ac_aggregate(&[true; 11]),
GateShip002Verdict::Fail,
"11-long all-true must Fail — AC-removal drift guard",
);
// 3c: 13-long all-true → Fail (AC added without spec update).
assert_eq!(
verdict_from_model2_ac_aggregate(&[true; 13]),
GateShip002Verdict::Fail,
"13-long all-true must Fail — AC-addition drift guard",
);
// 3d: 24-long all-true → Fail. Double-count guard (e.g.,
// accidental concatenation of MODEL-1 and MODEL-2 ACs, or
// double-pass concat).
assert_eq!(
verdict_from_model2_ac_aggregate(&[true; 24]),
GateShip002Verdict::Fail,
"24-long all-true must Fail — double-count guard",
);
// Section 4: exhaustive 2^12=4096-combination proof. The
// aggregate is Pass iff the bitmask is all-ones (0xFFF).
// Every other mask must yield Fail.
for mask in 0u16..4096 {
let acs: [bool; 12] = core::array::from_fn(|i| (mask >> i) & 1 == 1);
let expected = if mask == 0xFFF {
GateShip002Verdict::Pass
} else {
GateShip002Verdict::Fail
};
assert_eq!(
verdict_from_model2_ac_aggregate(&acs),
expected,
"mask=0b{mask:012b} expected {expected:?}",
);
}
// Section 5: monotonicity — once Pass, any flip Pass→Fail.
let pass_state = [true; 12];
for flip_idx in 0..12 {
let mut mutated = pass_state;
mutated[flip_idx] = false;
assert_ne!(
verdict_from_model2_ac_aggregate(&mutated),
verdict_from_model2_ac_aggregate(&pass_state),
"Pass→Fail monotonicity broken at idx {flip_idx}",
);
}
// Section 6: provenance pin — the required AC count is load-
// bearing and lockstepped with the spec §4.4. If the table
// ever grows to 13 rows or shrinks to 11, this constant (and
// this test) must move together.
assert_eq!(
AC_GATE_SHIP_002_MODEL_2_AC_COUNT, 12,
"MODEL-2 compound ship gate requires exactly 12 AC-SHIP2 items \
(spec §4.4 AC-SHIP2-001 through AC-SHIP2-012; §6 GATE-SHIP-002)",
);
}
}