#![allow(clippy::doc_overindented_list_items)]
#![allow(dead_code)]
use std::marker::PhantomData;
#[derive(Debug, Clone, Copy, Default)]
pub struct HiddenDim<const N: usize>(PhantomData<()>);
impl<const N: usize> HiddenDim<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct NumHeads<const N: usize>(PhantomData<()>);
impl<const N: usize> NumHeads<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct NumKvHeads<const N: usize>(PhantomData<()>);
impl<const N: usize> NumKvHeads<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct HeadDim<const N: usize>(PhantomData<()>);
impl<const N: usize> HeadDim<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct IntermediateDim<const N: usize>(PhantomData<()>);
impl<const N: usize> IntermediateDim<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct NumLayers<const N: usize>(PhantomData<()>);
impl<const N: usize> NumLayers<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct VocabSize<const N: usize>(PhantomData<()>);
impl<const N: usize> VocabSize<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
pub struct Llama370MConfig;
impl Llama370MConfig {
pub const PARAMETERS_NOMINAL: usize = 370_000_000;
pub const PARAMETERS_MIN: usize = 366_000_000;
pub const PARAMETERS_MAX: usize = 374_000_000;
pub const HIDDEN_DIM: usize = 1024;
pub const NUM_LAYERS: usize = 24;
pub const NUM_HEADS: usize = 16;
pub const NUM_KV_HEADS: usize = 4; pub const HEAD_DIM: usize = 64; pub const INTERMEDIATE_DIM: usize = 2816; pub const VOCAB_SIZE: usize = 50_257;
pub const MAX_POSITION_EMBEDDINGS: usize = 4096;
pub const ROPE_THETA: f32 = 10_000.0;
pub const RMS_NORM_EPS: f32 = 1.0e-5;
pub const TIED_EMBEDDINGS: bool = true; pub const HAS_BIAS: bool = false;
pub const fn validate() {
assert!(
Self::NUM_HEADS * Self::HEAD_DIM == Self::HIDDEN_DIM,
"INV-ARCH-370M-002 violated: num_heads * head_dim != hidden_dim",
);
assert!(
Self::NUM_KV_HEADS > 0 && Self::NUM_HEADS % Self::NUM_KV_HEADS == 0,
"INV-ARCH-370M-003 violated: num_kv_heads does not divide num_heads",
);
assert!(
Self::TIED_EMBEDDINGS,
"INV-ARCH-370M-004 violated: tied_embeddings must be true for 370M",
);
assert!(
Self::ROPE_THETA == 10_000.0_f32,
"INV-ARCH-370M-005 violated: rope_theta must be exactly 10000.0",
);
assert!(
Self::VOCAB_SIZE == 50_257,
"INV-ARCH-370M-006 violated: vocab_size must equal 50_257",
);
assert!(
!Self::HAS_BIAS,
"INV-ARCH-370M-008 violated: has_bias must be false (Llama convention)",
);
assert!(
Self::HIDDEN_DIM / Self::NUM_HEADS == Self::HEAD_DIM,
"hidden_dim / num_heads != head_dim — config internally inconsistent",
);
assert!(
Self::MAX_POSITION_EMBEDDINGS > 0 && Self::MAX_POSITION_EMBEDDINGS % 2 == 0,
"max_position_embeddings must be a positive even integer for RoPE",
);
}
}
#[allow(clippy::let_unit_value)]
const _: () = Llama370MConfig::validate();
#[must_use]
pub const fn estimated_param_count() -> usize {
estimated_stored_param_count() + (Llama370MConfig::VOCAB_SIZE * Llama370MConfig::HIDDEN_DIM)
}
#[must_use]
pub const fn estimated_stored_param_count() -> usize {
let h = Llama370MConfig::HIDDEN_DIM;
let l = Llama370MConfig::NUM_LAYERS;
let v = Llama370MConfig::VOCAB_SIZE;
let i = Llama370MConfig::INTERMEDIATE_DIM;
let nh = Llama370MConfig::NUM_HEADS;
let nkv = Llama370MConfig::NUM_KV_HEADS;
let hd = Llama370MConfig::HEAD_DIM;
let embedding = v * h;
let q = h * (nh * hd);
let k = h * (nkv * hd);
let vv = h * (nkv * hd);
let o = (nh * hd) * h;
let attn = q + k + vv + o;
let mlp = (h * i) + (h * i) + (i * h);
let norms = 2 * h;
let per_layer = attn + mlp + norms;
let final_norm = h;
embedding + l * per_layer + final_norm
}
pub fn assert_tokenizer_vocab_matches_model(
tokenizer_vocab_size: usize,
model_vocab_size: usize,
) -> Result<(), String> {
if tokenizer_vocab_size == model_vocab_size {
return Ok(());
}
Err(format!(
"GATE-ARCH-370M-011 (INV-ARCH-370M-006) violated: \
tokenizer vocab_size ({tokenizer_vocab_size}) != model vocab_size \
({model_vocab_size}). See contracts/model-families/llama-370m-sovereign-v1.yaml \
and contracts/tokenizer-bpe-v1.yaml — retrain the tokenizer or amend both contracts \
in lockstep before resuming pretraining."
))
}
pub fn assert_tokenizer_vocab_within_model_bound(
tokenizer_vocab_size: usize,
model_vocab_size: usize,
) -> Result<(), String> {
if tokenizer_vocab_size <= model_vocab_size {
return Ok(());
}
Err(format!(
"GATE-ARCH-370M-011 (INV-ARCH-370M-006-RELAXED) violated: \
tokenizer vocab_size ({tokenizer_vocab_size}) > model vocab_size \
({model_vocab_size}). See contracts/apr-pretrain-arch-polymorphic-v1.yaml \
§qwen_tokenizer_vocab_compatibility — for HF-distributed pretrained \
checkpoints, tokenizer_vocab MUST be <= model_vocab (reserved-slot \
tolerance); a tokenizer with MORE strings than the model expects \
would emit OOB ids → N-09 escape → silent garbage gradients."
))
}
pub const AC_SHIP2_007_HELDOUT_PROMPT_COUNT: usize = 100;
pub const AC_SHIP2_007_MAX_TOLERATED_SYNTAX_ERRORS: usize = 1;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Ship017Verdict {
Pass,
Fail,
}
pub const fn verdict_from_syntax_error_count(syntax_errors: usize) -> Ship017Verdict {
if syntax_errors <= AC_SHIP2_007_MAX_TOLERATED_SYNTAX_ERRORS {
Ship017Verdict::Pass
} else {
Ship017Verdict::Fail
}
}
pub const AC_SHIP2_010_MIN_DECODE_TPS_RTX4090: f32 = 100.0;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Ship020Verdict {
Pass,
Fail,
}
#[must_use]
pub fn verdict_from_decode_tps(measured_tps: f32) -> Ship020Verdict {
if !(measured_tps.is_finite()) {
return Ship020Verdict::Fail;
}
if measured_tps >= AC_SHIP2_010_MIN_DECODE_TPS_RTX4090 {
Ship020Verdict::Pass
} else {
Ship020Verdict::Fail
}
}
pub const AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT: f32 = 30.0;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Ship018Verdict {
Pass,
Fail,
}
#[must_use]
pub fn verdict_from_pass_at_1(correct: usize, total: usize, threshold_pct: f32) -> Ship018Verdict {
if total == 0 {
return Ship018Verdict::Fail;
}
if correct > total {
return Ship018Verdict::Fail;
}
if !threshold_pct.is_finite() {
return Ship018Verdict::Fail;
}
let ratio_pct = (correct as f32 / total as f32) * 100.0_f32;
if ratio_pct >= threshold_pct {
Ship018Verdict::Pass
} else {
Ship018Verdict::Fail
}
}
pub const AC_SHIP2_006_REQUIRED_QA_GATE_COUNT: usize = 8;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Ship016Verdict {
Pass,
Fail,
}
#[must_use]
pub fn verdict_from_qa_gates(gate_results: &[bool]) -> Ship016Verdict {
if gate_results.len() != AC_SHIP2_006_REQUIRED_QA_GATE_COUNT {
return Ship016Verdict::Fail;
}
if gate_results.iter().all(|&passed| passed) {
Ship016Verdict::Pass
} else {
Ship016Verdict::Fail
}
}
pub const AC_SHIP2_003_MAX_VAL_CROSS_ENTROPY_LOSS: f32 = 2.2;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Ship013Verdict {
Pass,
Fail,
}
#[must_use]
pub const fn verdict_from_val_ce_loss(measured_ce: f32) -> Ship013Verdict {
if !measured_ce.is_finite() {
return Ship013Verdict::Fail;
}
if measured_ce < 0.0 {
return Ship013Verdict::Fail;
}
if measured_ce <= AC_SHIP2_003_MAX_VAL_CROSS_ENTROPY_LOSS {
Ship013Verdict::Pass
} else {
Ship013Verdict::Fail
}
}
pub const AC_SHIP2_004_MAX_TRAINING_DURATION_DAYS: u32 = 21;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Ship014Verdict {
Pass,
Fail,
}
#[must_use]
pub const fn verdict_from_training_duration_days(measured_days: u32) -> Ship014Verdict {
if measured_days <= AC_SHIP2_004_MAX_TRAINING_DURATION_DAYS {
Ship014Verdict::Pass
} else {
Ship014Verdict::Fail
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn config_matches_contract_values() {
assert_eq!(Llama370MConfig::HIDDEN_DIM, 1024);
assert_eq!(Llama370MConfig::NUM_LAYERS, 24);
assert_eq!(Llama370MConfig::NUM_HEADS, 16);
assert_eq!(Llama370MConfig::NUM_KV_HEADS, 4);
assert_eq!(Llama370MConfig::HEAD_DIM, 64);
assert_eq!(Llama370MConfig::INTERMEDIATE_DIM, 2816);
assert_eq!(Llama370MConfig::VOCAB_SIZE, 50_257);
assert_eq!(Llama370MConfig::MAX_POSITION_EMBEDDINGS, 4096);
assert!((Llama370MConfig::ROPE_THETA - 10_000.0_f32).abs() < 1e-6);
assert!((Llama370MConfig::RMS_NORM_EPS - 1.0e-5_f32).abs() < 1e-9);
assert!(Llama370MConfig::TIED_EMBEDDINGS);
assert!(!Llama370MConfig::HAS_BIAS);
assert_eq!(
Llama370MConfig::NUM_HEADS * Llama370MConfig::HEAD_DIM,
Llama370MConfig::HIDDEN_DIM,
);
assert_eq!(Llama370MConfig::NUM_HEADS % Llama370MConfig::NUM_KV_HEADS, 0);
}
#[test]
fn falsify_gate_arch_370m_011_helper_rejects_mismatch() {
assert!(assert_tokenizer_vocab_matches_model(
Llama370MConfig::VOCAB_SIZE,
Llama370MConfig::VOCAB_SIZE,
)
.is_ok());
let mismatch = Llama370MConfig::VOCAB_SIZE - 1;
let err = assert_tokenizer_vocab_matches_model(mismatch, Llama370MConfig::VOCAB_SIZE)
.expect_err("mismatch must return Err");
assert!(
err.contains("GATE-ARCH-370M-011")
&& err.contains(&mismatch.to_string())
&& err.contains(&Llama370MConfig::VOCAB_SIZE.to_string()),
"error must name the gate and both vocab sizes for forensics, got: {err}",
);
assert!(assert_tokenizer_vocab_matches_model(0, 1).is_err());
assert!(assert_tokenizer_vocab_matches_model(
Llama370MConfig::VOCAB_SIZE + 1,
Llama370MConfig::VOCAB_SIZE
)
.is_err());
}
#[test]
fn falsify_apr_pretrain_arch_009_relaxed_bound_accepts_qwen_reserved_slots() {
const QWEN_BPE_PLUS_ADDED: usize = 151_665;
const QWEN_DECLARED_VOCAB: usize = 151_936;
assert!(
assert_tokenizer_vocab_within_model_bound(
QWEN_BPE_PLUS_ADDED,
QWEN_DECLARED_VOCAB
)
.is_ok(),
"FALSIFY-APR-PRETRAIN-ARCH-009: tokenizer 151665 ≤ model 151936 MUST pass relaxed bound \
(HF reserved-slot tolerance)"
);
const QWEN_BPE_ONLY: usize = 151_643;
assert!(
assert_tokenizer_vocab_within_model_bound(QWEN_BPE_ONLY, QWEN_DECLARED_VOCAB).is_ok()
);
assert!(assert_tokenizer_vocab_within_model_bound(
Llama370MConfig::VOCAB_SIZE,
Llama370MConfig::VOCAB_SIZE
)
.is_ok());
}
#[test]
fn falsify_apr_pretrain_arch_010_relaxed_bound_rejects_oversized_tokenizer() {
const QWEN_DECLARED_VOCAB: usize = 151_936;
let oversized = QWEN_DECLARED_VOCAB + 1;
let err =
assert_tokenizer_vocab_within_model_bound(oversized, QWEN_DECLARED_VOCAB)
.expect_err("FALSIFY-APR-PRETRAIN-ARCH-010: tokenizer > model MUST fail-fast");
assert!(
err.contains("RELAXED") && err.contains("OOB"),
"error must cite the relaxed-mode invariant + OOB risk: {err}"
);
assert!(
err.contains(&oversized.to_string())
&& err.contains(&QWEN_DECLARED_VOCAB.to_string()),
"error must name both sizes for forensics: {err}"
);
}
#[test]
fn estimated_param_count_within_contract_band() {
let p = estimated_param_count();
let stored = estimated_stored_param_count();
eprintln!("albor-370m nominal param count = {p} ({} M)", p / 1_000_000,);
eprintln!(
"albor-370m stored param count = {stored} ({} M, lm_head tied)",
stored / 1_000_000,
);
assert!(
p >= Llama370MConfig::PARAMETERS_MIN,
"nominal param count {p} below INV-ARCH-370M-001 floor (366M)",
);
assert!(
p <= Llama370MConfig::PARAMETERS_MAX,
"nominal param count {p} above INV-ARCH-370M-001 ceiling (374M)",
);
let nominal = Llama370MConfig::PARAMETERS_NOMINAL as f64;
let pct = (p as f64 - nominal).abs() / nominal;
assert!(
pct < 0.05,
"nominal param count {p} differs from 370M by {:.2}% (> 5%)",
pct * 100.0,
);
assert_eq!(
p - stored,
Llama370MConfig::VOCAB_SIZE * Llama370MConfig::HIDDEN_DIM,
"tying accounting mismatch",
);
}
#[test]
fn validate_is_a_noop_at_runtime() {
Llama370MConfig::validate();
}
#[test]
fn shape_newtypes_compile_and_roundtrip() {
type Hidden = HiddenDim<{ Llama370MConfig::HIDDEN_DIM }>;
type Heads = NumHeads<{ Llama370MConfig::NUM_HEADS }>;
type KvHeads = NumKvHeads<{ Llama370MConfig::NUM_KV_HEADS }>;
type Head = HeadDim<{ Llama370MConfig::HEAD_DIM }>;
type Inter = IntermediateDim<{ Llama370MConfig::INTERMEDIATE_DIM }>;
type Layers = NumLayers<{ Llama370MConfig::NUM_LAYERS }>;
type Vocab = VocabSize<{ Llama370MConfig::VOCAB_SIZE }>;
assert_eq!(Hidden::VALUE, 1024);
assert_eq!(Heads::VALUE, 16);
assert_eq!(KvHeads::VALUE, 4);
assert_eq!(Head::VALUE, 64);
assert_eq!(Inter::VALUE, 2816);
assert_eq!(Layers::VALUE, 24);
assert_eq!(Vocab::VALUE, 50_257);
assert_eq!(std::mem::size_of::<Hidden>(), 0);
assert_eq!(std::mem::size_of::<Heads>(), 0);
}
const SOVEREIGN_CONTRACT_YAML: &str =
include_str!("../../../../contracts/model-families/llama-370m-sovereign-v1.yaml");
#[test]
fn falsify_ship_011_rust_scaffold_matches_yaml_contract() {
let doc: serde_yaml::Value = serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML)
.expect("llama-370m-sovereign-v1.yaml must parse as YAML");
assert_eq!(
doc["contract_id"].as_str(),
Some("C-LLAMA-370M-SOVEREIGN"),
"wrong contract loaded — check include_str! path",
);
assert_eq!(doc["family"].as_str(), Some("llama"));
assert_eq!(doc["size_variant"].as_str(), Some("370m"));
let arch = &doc["architecture"];
assert_eq!(
arch["hidden_dim"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::HIDDEN_DIM),
"YAML architecture.hidden_dim drifted from Rust const",
);
assert_eq!(
arch["num_layers"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::NUM_LAYERS),
);
assert_eq!(
arch["num_heads"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::NUM_HEADS),
);
assert_eq!(
arch["num_kv_heads"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::NUM_KV_HEADS),
);
assert_eq!(arch["head_dim"].as_u64().map(|v| v as usize), Some(Llama370MConfig::HEAD_DIM),);
assert_eq!(
arch["intermediate_dim"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::INTERMEDIATE_DIM),
);
assert_eq!(
arch["vocab_size"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::VOCAB_SIZE),
);
assert_eq!(
arch["max_position_embeddings"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::MAX_POSITION_EMBEDDINGS),
);
let rope_theta = arch["rope_theta"].as_f64().expect("rope_theta must be a float");
assert!(
(rope_theta - f64::from(Llama370MConfig::ROPE_THETA)).abs() < 1e-6,
"YAML rope_theta {rope_theta} != Rust const {}",
Llama370MConfig::ROPE_THETA,
);
let constraints = &doc["constraints"];
assert_eq!(
constraints["tied_embeddings"].as_bool(),
Some(Llama370MConfig::TIED_EMBEDDINGS),
);
assert_eq!(constraints["has_bias"].as_bool(), Some(Llama370MConfig::HAS_BIAS),);
assert_eq!(constraints["attention_type"].as_str(), Some("gqa"));
assert_eq!(constraints["activation"].as_str(), Some("silu"));
assert_eq!(constraints["norm_type"].as_str(), Some("rmsnorm"));
assert_eq!(constraints["positional_encoding"].as_str(), Some("rope"));
assert_eq!(constraints["mlp_type"].as_str(), Some("swiglu"));
}
#[test]
fn falsify_ship_011_sovereign_contract_is_active() {
let doc: serde_yaml::Value =
serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML).expect("parse sovereign contract");
assert_eq!(
doc["status"].as_str(),
Some("ACTIVE"),
"C-LLAMA-370M-SOVEREIGN must be ACTIVE once FALSIFY-SHIP-011 \
discharges — PROPOSED contracts cannot gate a ship",
);
}
fn enumerate_370m_apr_tensors() -> Vec<(String, Vec<usize>)> {
let h = Llama370MConfig::HIDDEN_DIM;
let v = Llama370MConfig::VOCAB_SIZE;
let i = Llama370MConfig::INTERMEDIATE_DIM;
let nh = Llama370MConfig::NUM_HEADS;
let nkv = Llama370MConfig::NUM_KV_HEADS;
let hd = Llama370MConfig::HEAD_DIM;
let layers = Llama370MConfig::NUM_LAYERS;
let mut out: Vec<(String, Vec<usize>)> = Vec::with_capacity(3 + 9 * layers);
out.push(("model.embed_tokens.weight".into(), vec![v, h]));
out.push(("lm_head.weight".into(), vec![v, h]));
for n in 0..layers {
out.push((format!("model.layers.{n}.self_attn.q_proj.weight"), vec![nh * hd, h]));
out.push((format!("model.layers.{n}.self_attn.k_proj.weight"), vec![nkv * hd, h]));
out.push((format!("model.layers.{n}.self_attn.v_proj.weight"), vec![nkv * hd, h]));
out.push((format!("model.layers.{n}.self_attn.o_proj.weight"), vec![h, nh * hd]));
out.push((format!("model.layers.{n}.mlp.gate_proj.weight"), vec![i, h]));
out.push((format!("model.layers.{n}.mlp.up_proj.weight"), vec![i, h]));
out.push((format!("model.layers.{n}.mlp.down_proj.weight"), vec![h, i]));
out.push((format!("model.layers.{n}.input_layernorm.weight"), vec![h]));
out.push((format!("model.layers.{n}.post_attention_layernorm.weight"), vec![h]));
}
out.push(("model.norm.weight".into(), vec![h]));
out
}
#[test]
fn falsify_ship_019_layout_contract_covers_every_370m_tensor() {
use aprender::format::layout_contract::LayoutContract;
let contract = LayoutContract::new();
let tensors = enumerate_370m_apr_tensors();
assert_eq!(
tensors.len(),
3 + 9 * Llama370MConfig::NUM_LAYERS,
"370M enumerator produced wrong tensor count — scaffold drift",
);
for (name, _) in &tensors {
assert!(
contract.get_apr_contract(name).is_some(),
"370M tensor `{name}` has no layout_contract entry — \
LAYOUT-001 coverage gap (every tensor in this model must \
pattern-match a TensorContract or GGUF export layout will \
silently skip it)",
);
}
let lm = tensors
.iter()
.find(|(n, _)| n == "lm_head.weight")
.expect("lm_head must be enumerated");
assert_eq!(
lm.1,
vec![Llama370MConfig::VOCAB_SIZE, Llama370MConfig::HIDDEN_DIM],
"lm_head.weight must be row-major [vocab, hidden] — GH-202 \
root cause; reversed `[hidden, vocab]` produces [PAD] garbage",
);
let embed = tensors
.iter()
.find(|(n, _)| n == "model.embed_tokens.weight")
.expect("embed_tokens must be enumerated");
assert_eq!(
embed.1,
vec![Llama370MConfig::VOCAB_SIZE, Llama370MConfig::HIDDEN_DIM],
"embed_tokens.weight must be row-major [vocab, hidden]",
);
let k0 = tensors
.iter()
.find(|(n, _)| n == "model.layers.0.self_attn.k_proj.weight")
.expect("k_proj layer 0 must be enumerated");
assert_eq!(
k0.1,
vec![
Llama370MConfig::NUM_KV_HEADS * Llama370MConfig::HEAD_DIM,
Llama370MConfig::HIDDEN_DIM,
],
"k_proj must be row-major [kv_heads*head_dim, hidden] — GQA",
);
let q0 = tensors
.iter()
.find(|(n, _)| n == "model.layers.0.self_attn.q_proj.weight")
.expect("q_proj layer 0 must be enumerated");
assert_eq!(
q0.1,
vec![
Llama370MConfig::NUM_HEADS * Llama370MConfig::HEAD_DIM,
Llama370MConfig::HIDDEN_DIM,
],
"q_proj must be row-major [heads*head_dim, hidden]",
);
contract
.validate_apr_shape(
"lm_head.weight",
&[Llama370MConfig::VOCAB_SIZE, Llama370MConfig::HIDDEN_DIM],
Llama370MConfig::VOCAB_SIZE,
Llama370MConfig::HIDDEN_DIM,
)
.expect("correct [vocab, hidden] lm_head must validate");
let bad = contract.validate_apr_shape(
"lm_head.weight",
&[Llama370MConfig::HIDDEN_DIM, Llama370MConfig::VOCAB_SIZE],
Llama370MConfig::VOCAB_SIZE,
Llama370MConfig::HIDDEN_DIM,
);
assert!(
bad.is_err(),
"reversed [hidden, vocab] lm_head MUST be rejected by the \
layout contract — this is GH-202 regression protection",
);
}
#[test]
fn falsify_ship_019_gate_arch_370m_004_has_partial_discharge_marker() {
let doc: serde_yaml::Value =
serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML).expect("parse sovereign contract");
let gates =
doc["gates"].as_sequence().expect("gates must be a sequence in sovereign contract");
let gate = gates
.iter()
.find(|g| g["id"].as_str() == Some("GATE-ARCH-370M-004"))
.expect("GATE-ARCH-370M-004 must exist in sovereign contract");
assert_eq!(
gate["falsification_id"].as_str(),
Some("FALSIFY-SHIP-019"),
"GATE-ARCH-370M-004 must bind FALSIFY-SHIP-019",
);
assert_eq!(
gate["binds_to"].as_str(),
Some("AC-SHIP2-009"),
"GATE-ARCH-370M-004 must bind AC-SHIP2-009",
);
assert_eq!(
gate["discharge_status"].as_str(),
Some("PARTIAL_ALGORITHM_LEVEL"),
"GATE-ARCH-370M-004 must advertise PARTIAL_ALGORITHM_LEVEL \
(full discharge blocks on real trained 370M .apr)",
);
let evidence = gate["evidence_discharged_by"]
.as_sequence()
.expect("GATE-ARCH-370M-004 must have evidence_discharged_by");
assert!(
!evidence.is_empty(),
"GATE-ARCH-370M-004 evidence_discharged_by must list \
at least one test function or artifact",
);
assert!(
gate["full_discharge_blocks_on"].as_str().is_some(),
"PARTIAL gate must document full_discharge_blocks_on",
);
assert_eq!(
gate["ship_blocking"].as_bool(),
Some(true),
"GATE-ARCH-370M-004 must advertise ship_blocking:true — the \
gate's `verdict:pass` alone is insufficient green while \
discharge_status == PARTIAL_ALGORITHM_LEVEL",
);
}
#[test]
fn falsify_ship_017_syntax_error_count_threshold_logic() {
assert_eq!(
verdict_from_syntax_error_count(0),
Ship017Verdict::Pass,
"0 syntax errors must always Pass",
);
assert_eq!(
verdict_from_syntax_error_count(1),
Ship017Verdict::Pass,
"1 syntax error is the AC_SHIP2_007_MAX_TOLERATED_SYNTAX_ERRORS \
boundary and must Pass",
);
assert_eq!(
verdict_from_syntax_error_count(2),
Ship017Verdict::Fail,
"2 syntax errors is the FALSIFY-SHIP-017 ship-blocker \
boundary and must Fail",
);
assert_eq!(
verdict_from_syntax_error_count(AC_SHIP2_007_HELDOUT_PROMPT_COUNT),
Ship017Verdict::Fail,
"all-errors must Fail — trivial sanity",
);
assert_eq!(
verdict_from_syntax_error_count(AC_SHIP2_007_HELDOUT_PROMPT_COUNT / 2),
Ship017Verdict::Fail,
"50% errors on 100 prompts must Fail",
);
let mut last_was_fail = false;
for errors in 0..=AC_SHIP2_007_HELDOUT_PROMPT_COUNT {
let verdict = verdict_from_syntax_error_count(errors);
if last_was_fail {
assert_eq!(
verdict,
Ship017Verdict::Fail,
"monotonicity violation at errors={errors}: once Fail, \
more errors cannot return to Pass",
);
}
if verdict == Ship017Verdict::Fail {
last_was_fail = true;
}
}
assert_eq!(
AC_SHIP2_007_HELDOUT_PROMPT_COUNT, 100,
"AC-SHIP2-007 spec §6 pins the harness at 100 held-out prompts",
);
assert_eq!(
AC_SHIP2_007_MAX_TOLERATED_SYNTAX_ERRORS, 1,
"FALSIFY-SHIP-017 (spec §8.3 row) tolerates ≤ 1 SyntaxError",
);
}
#[test]
fn falsify_ship_017_gate_arch_370m_005_has_partial_discharge_marker() {
let doc: serde_yaml::Value =
serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML).expect("parse sovereign contract");
let gates =
doc["gates"].as_sequence().expect("gates must be a sequence in sovereign contract");
let gate = gates
.iter()
.find(|g| g["id"].as_str() == Some("GATE-ARCH-370M-005"))
.expect("GATE-ARCH-370M-005 must exist in sovereign contract");
assert_eq!(
gate["falsification_id"].as_str(),
Some("FALSIFY-SHIP-017"),
"GATE-ARCH-370M-005 must bind FALSIFY-SHIP-017",
);
assert_eq!(
gate["binds_to"].as_str(),
Some("AC-SHIP2-007"),
"GATE-ARCH-370M-005 must bind AC-SHIP2-007",
);
assert_eq!(
gate["discharge_status"].as_str(),
Some("PARTIAL_ALGORITHM_LEVEL"),
"GATE-ARCH-370M-005 must advertise PARTIAL_ALGORITHM_LEVEL \
(full discharge blocks on real trained 370M .apr + 100-prompt \
`apr run` harness)",
);
let evidence = gate["evidence_discharged_by"]
.as_sequence()
.expect("GATE-ARCH-370M-005 must have evidence_discharged_by");
assert!(
!evidence.is_empty(),
"GATE-ARCH-370M-005 evidence_discharged_by must list \
at least one test function or artifact",
);
assert!(
gate["full_discharge_blocks_on"].as_str().is_some(),
"PARTIAL gate must document full_discharge_blocks_on",
);
assert_eq!(
gate["ship_blocking"].as_bool(),
Some(true),
"GATE-ARCH-370M-005 must advertise ship_blocking:true — the \
gate's `verdict:pass` alone is insufficient green while \
discharge_status == PARTIAL_ALGORITHM_LEVEL",
);
}
#[test]
fn falsify_ship_020_decode_tps_threshold_logic() {
assert_eq!(
verdict_from_decode_tps(100.0),
Ship020Verdict::Pass,
"exactly 100.0 tok/s must Pass (contract floor)",
);
let just_below = f32::from_bits(100.0_f32.to_bits() - 1);
assert!(just_below < 100.0);
assert_eq!(
verdict_from_decode_tps(just_below),
Ship020Verdict::Fail,
"one ULP below 100.0 tok/s must Fail",
);
assert_eq!(verdict_from_decode_tps(120.0), Ship020Verdict::Pass);
assert_eq!(verdict_from_decode_tps(500.0), Ship020Verdict::Pass);
assert_eq!(verdict_from_decode_tps(0.0), Ship020Verdict::Fail);
assert_eq!(verdict_from_decode_tps(50.0), Ship020Verdict::Fail);
let samples = [0.0_f32, 25.0, 50.0, 75.0, 99.0, 99.5, 99.99, 100.0, 150.0, 10_000.0];
let mut seen_fail = false;
for &t in &samples {
let v = verdict_from_decode_tps(t);
if v == Ship020Verdict::Fail {
seen_fail = true;
} else if seen_fail {
seen_fail = false;
}
}
let decreasing = [10_000.0_f32, 500.0, 150.0, 100.0, 99.99, 99.0, 50.0, 25.0, 0.0];
let mut locked_fail = false;
for &t in &decreasing {
let v = verdict_from_decode_tps(t);
if v == Ship020Verdict::Fail {
locked_fail = true;
} else {
assert!(
!locked_fail,
"monotonicity violated: tps={t} produced Pass after a \
lower-tps Fail was already observed",
);
}
}
assert_eq!(
verdict_from_decode_tps(f32::NAN),
Ship020Verdict::Fail,
"NaN tps has no well-defined median and must Fail",
);
assert_eq!(verdict_from_decode_tps(f32::NEG_INFINITY), Ship020Verdict::Fail,);
assert_eq!(
verdict_from_decode_tps(f32::INFINITY),
Ship020Verdict::Fail,
"+∞ tok/s is ill-formed — a real `apr bench` median is \
always a finite positive; treating +∞ as Pass would let \
an instrumentation bug silently green the ship-gate",
);
assert!(
(AC_SHIP2_010_MIN_DECODE_TPS_RTX4090 - 100.0_f32).abs() < f32::EPSILON,
"AC_SHIP2_010_MIN_DECODE_TPS_RTX4090 must stay pinned to 100.0 \
tok/s — see contracts/model-families/llama-370m-sovereign-v1.yaml \
GATE-ARCH-370M-006",
);
}
#[test]
fn falsify_ship_020_gate_arch_370m_006_has_partial_discharge_marker() {
let doc: serde_yaml::Value =
serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML).expect("parse sovereign contract");
let gates =
doc["gates"].as_sequence().expect("gates must be a sequence in sovereign contract");
let gate = gates
.iter()
.find(|g| g["id"].as_str() == Some("GATE-ARCH-370M-006"))
.expect("GATE-ARCH-370M-006 must exist in sovereign contract");
assert_eq!(
gate["falsification_id"].as_str(),
Some("FALSIFY-SHIP-020"),
"GATE-ARCH-370M-006 must bind FALSIFY-SHIP-020",
);
assert_eq!(
gate["binds_to"].as_str(),
Some("AC-SHIP2-010"),
"GATE-ARCH-370M-006 must bind AC-SHIP2-010",
);
assert_eq!(
gate["discharge_status"].as_str(),
Some("PARTIAL_ALGORITHM_LEVEL"),
"GATE-ARCH-370M-006 must advertise PARTIAL_ALGORITHM_LEVEL \
(full discharge blocks on real trained 370M .apr + RTX 4090 \
`apr bench` median run)",
);
let evidence = gate["evidence_discharged_by"]
.as_sequence()
.expect("GATE-ARCH-370M-006 must have evidence_discharged_by");
assert!(
!evidence.is_empty(),
"GATE-ARCH-370M-006 evidence_discharged_by must list \
at least one test function or artifact",
);
assert!(
gate["full_discharge_blocks_on"].as_str().is_some(),
"PARTIAL gate must document full_discharge_blocks_on",
);
assert_eq!(
gate["ship_blocking"].as_bool(),
Some(true),
"GATE-ARCH-370M-006 must advertise ship_blocking:true — the \
gate's `verdict:pass` alone is insufficient green while \
discharge_status == PARTIAL_ALGORITHM_LEVEL",
);
}
#[test]
fn falsify_ship_018_humaneval_pass_at_1_threshold_logic() {
assert_eq!(
verdict_from_pass_at_1(30, 100, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Pass,
"30/100 = 30.0% must pass the 30.0 floor",
);
assert_eq!(
verdict_from_pass_at_1(60, 200, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Pass,
"60/200 = 30.0% must pass the 30.0 floor",
);
assert_eq!(
verdict_from_pass_at_1(50, 164, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Pass,
"50/164 ≈ 30.49% must pass the 30.0 floor",
);
assert_eq!(
verdict_from_pass_at_1(49, 164, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Fail,
"49/164 ≈ 29.88% must fail the 30.0 floor",
);
assert_eq!(
verdict_from_pass_at_1(29, 100, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Fail,
"29/100 = 29.0% must fail the 30.0 floor",
);
let exact_50 = 50.0_f32;
assert_eq!(50.0_f32 * 2.0_f32, 100.0_f32, "sanity: 50.0 is exact in f32");
let fifty_plus_ulp = f32::from_bits(exact_50.to_bits() + 1);
let fifty_minus_ulp = f32::from_bits(exact_50.to_bits() - 1);
assert!(fifty_plus_ulp > exact_50, "sanity: +ULP is strictly above");
assert!(fifty_minus_ulp < exact_50, "sanity: −ULP is strictly below");
assert_eq!(
verdict_from_pass_at_1(50, 100, exact_50),
Ship018Verdict::Pass,
"inclusive floor: 50.0% ≥ 50.0 must Pass (proves `>=`, not `>`)",
);
assert_eq!(
verdict_from_pass_at_1(50, 100, fifty_plus_ulp),
Ship018Verdict::Fail,
"50/100 must fail when threshold is one ULP above 50.0",
);
assert_eq!(
verdict_from_pass_at_1(50, 100, fifty_minus_ulp),
Ship018Verdict::Pass,
"50/100 must pass when threshold is one ULP below 50.0",
);
assert_eq!(
verdict_from_pass_at_1(82, 164, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Pass,
"82/164 = 50% must pass",
);
assert_eq!(
verdict_from_pass_at_1(164, 164, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Pass,
"perfect score must pass",
);
assert_eq!(
verdict_from_pass_at_1(0, 164, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Fail,
"zero-correct run must fail",
);
assert_eq!(
verdict_from_pass_at_1(1, 164, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Fail,
"1/164 ≈ 0.6% must fail",
);
let total = 164usize;
let mut already_passed = false;
for correct in 0..=total {
let v =
verdict_from_pass_at_1(correct, total, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT);
match v {
Ship018Verdict::Pass => {
already_passed = true;
}
Ship018Verdict::Fail => {
assert!(
!already_passed,
"monotonicity violated: correct={correct} reverted Pass→Fail",
);
}
}
}
assert_eq!(
verdict_from_pass_at_1(0, 0, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Fail,
"empty run (total=0) must fail — a positive floor is unsatisfiable",
);
assert_eq!(
verdict_from_pass_at_1(0, 0, 0.0_f32),
Ship018Verdict::Fail,
"empty run must fail even with a zero threshold — the harness \
is broken if it reports an empty denominator",
);
assert_eq!(
verdict_from_pass_at_1(165, 164, AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT),
Ship018Verdict::Fail,
"correct > total is a broken harness report; must fail closed",
);
assert_eq!(
verdict_from_pass_at_1(164, 164, f32::NAN),
Ship018Verdict::Fail,
"NaN threshold must fail",
);
assert_eq!(
verdict_from_pass_at_1(164, 164, f32::INFINITY),
Ship018Verdict::Fail,
"+∞ threshold must fail",
);
assert_eq!(
verdict_from_pass_at_1(164, 164, f32::NEG_INFINITY),
Ship018Verdict::Fail,
"−∞ threshold must fail",
);
assert!(
(AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT - 30.0_f32).abs() < f32::EPSILON,
"contract floor drift: AC_SHIP2_008_MIN_HUMANEVAL_PASS_AT_1_PCT \
must stay pinned to 30.0 (spec §5.2 AC-SHIP2-008)",
);
}
#[test]
fn falsify_ship_018_gate_arch_370m_007_has_partial_discharge_marker() {
let doc: serde_yaml::Value =
serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML).expect("parse sovereign contract");
let gates = doc["gates"].as_sequence().expect("contract must have `gates:` sequence");
let gate = gates
.iter()
.find(|g| g["id"].as_str() == Some("GATE-ARCH-370M-007"))
.expect("GATE-ARCH-370M-007 (SHIP-018 humaneval pass@1) must be present");
assert_eq!(
gate["binds_to"].as_str(),
Some("AC-SHIP2-008"),
"GATE-ARCH-370M-007 must bind AC-SHIP2-008",
);
assert_eq!(
gate["falsification_id"].as_str(),
Some("FALSIFY-SHIP-018"),
"GATE-ARCH-370M-007 must bind FALSIFY-SHIP-018",
);
assert_eq!(
gate["discharge_status"].as_str(),
Some("PARTIAL_ALGORITHM_LEVEL"),
"GATE-ARCH-370M-007 must advertise PARTIAL_ALGORITHM_LEVEL — \
full discharge blocks on trained 370M .apr + real apr eval",
);
let evidence = gate["evidence_discharged_by"]
.as_sequence()
.expect("GATE-ARCH-370M-007 must have evidence_discharged_by");
assert!(
!evidence.is_empty(),
"GATE-ARCH-370M-007 evidence_discharged_by must list at least \
one test function or const pin",
);
assert!(
gate["full_discharge_blocks_on"].as_str().is_some(),
"PARTIAL gate must document full_discharge_blocks_on",
);
assert_eq!(
gate["ship_blocking"].as_bool(),
Some(true),
"GATE-ARCH-370M-007 must advertise ship_blocking:true — \
verdict:pass alone is insufficient green while \
discharge_status == PARTIAL_ALGORITHM_LEVEL",
);
}
#[test]
fn falsify_ship_016_apr_qa_aggregate_and_logic() {
let all_pass = [true; AC_SHIP2_006_REQUIRED_QA_GATE_COUNT];
assert_eq!(
verdict_from_qa_gates(&all_pass),
Ship016Verdict::Pass,
"AC-SHIP2-006: all 8 gates PASS must yield Pass",
);
for flip_idx in 0..AC_SHIP2_006_REQUIRED_QA_GATE_COUNT {
let mut gates = [true; AC_SHIP2_006_REQUIRED_QA_GATE_COUNT];
gates[flip_idx] = false;
assert_eq!(
verdict_from_qa_gates(&gates),
Ship016Verdict::Fail,
"flipping gate index {flip_idx} from Pass to Fail must yield aggregate Fail \
— SHIP-016 is an AND, not a majority or threshold",
);
}
let all_fail = [false; AC_SHIP2_006_REQUIRED_QA_GATE_COUNT];
assert_eq!(
verdict_from_qa_gates(&all_fail),
Ship016Verdict::Fail,
"all 8 gates FAIL must yield Fail",
);
let mut pass_count = 0usize;
let mut fail_count = 0usize;
for mask in 0u32..(1u32 << AC_SHIP2_006_REQUIRED_QA_GATE_COUNT) {
let gates: [bool; AC_SHIP2_006_REQUIRED_QA_GATE_COUNT] =
std::array::from_fn(|i| (mask >> i) & 1 == 1);
match verdict_from_qa_gates(&gates) {
Ship016Verdict::Pass => {
pass_count += 1;
assert!(
gates.iter().all(|&p| p),
"Pass verdict must only occur when all 8 gates are true; \
got {gates:?} at mask {mask:#010b}",
);
}
Ship016Verdict::Fail => {
fail_count += 1;
assert!(
gates.iter().any(|&p| !p),
"Fail verdict must only occur when at least one gate is false; \
got {gates:?} at mask {mask:#010b}",
);
}
}
}
assert_eq!(pass_count, 1, "exactly one of 256 combos (all-true) yields Pass");
assert_eq!(fail_count, 255, "the other 255 combos must yield Fail");
for mask in 0u32..(1u32 << AC_SHIP2_006_REQUIRED_QA_GATE_COUNT) {
let before: [bool; AC_SHIP2_006_REQUIRED_QA_GATE_COUNT] =
std::array::from_fn(|i| (mask >> i) & 1 == 1);
for flip_idx in 0..AC_SHIP2_006_REQUIRED_QA_GATE_COUNT {
if before[flip_idx] {
continue;
}
let mut after = before;
after[flip_idx] = true;
let before_v = verdict_from_qa_gates(&before);
let after_v = verdict_from_qa_gates(&after);
assert!(
!(before_v == Ship016Verdict::Pass && after_v == Ship016Verdict::Fail),
"monotonicity violated: flipping gate {flip_idx} from false to true \
regressed Pass→Fail at mask {mask:#010b}",
);
}
}
assert_eq!(
verdict_from_qa_gates(&[]),
Ship016Verdict::Fail,
"empty gate slice must Fail (contract drift)",
);
assert_eq!(
verdict_from_qa_gates(&[true; 7]),
Ship016Verdict::Fail,
"7 gates (short by one) must Fail even when all true (contract drift)",
);
assert_eq!(
verdict_from_qa_gates(&[true; 9]),
Ship016Verdict::Fail,
"9 gates (long by one) must Fail even when all true (contract drift)",
);
assert_eq!(
verdict_from_qa_gates(&[true; 16]),
Ship016Verdict::Fail,
"double-wide gate slice must Fail (contract drift)",
);
assert_eq!(
AC_SHIP2_006_REQUIRED_QA_GATE_COUNT, 8,
"AC-SHIP2-006 is the 8-gate aggregate; any change requires \
contract + spec + CLI skip-flag edits in lockstep",
);
}
#[test]
fn falsify_ship_016_gate_arch_370m_008_has_partial_discharge_marker() {
let doc: serde_yaml::Value =
serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML).expect("parse sovereign contract");
let gates =
doc["gates"].as_sequence().expect("gates must be a sequence in sovereign contract");
let gate = gates
.iter()
.find(|g| g["id"].as_str() == Some("GATE-ARCH-370M-008"))
.expect("GATE-ARCH-370M-008 must exist in sovereign contract");
assert_eq!(
gate["falsification_id"].as_str(),
Some("FALSIFY-SHIP-016"),
"GATE-ARCH-370M-008 must bind FALSIFY-SHIP-016",
);
assert_eq!(
gate["binds_to"].as_str(),
Some("AC-SHIP2-006"),
"GATE-ARCH-370M-008 must bind AC-SHIP2-006",
);
assert_eq!(
gate["discharge_status"].as_str(),
Some("PARTIAL_ALGORITHM_LEVEL"),
"GATE-ARCH-370M-008 must advertise PARTIAL_ALGORITHM_LEVEL \
(full discharge blocks on real trained 370M .apr + `apr qa` harness)",
);
let evidence = gate["evidence_discharged_by"]
.as_sequence()
.expect("GATE-ARCH-370M-008 must have evidence_discharged_by");
assert!(
!evidence.is_empty(),
"GATE-ARCH-370M-008 evidence_discharged_by must list \
at least one test function or artifact",
);
assert!(
gate["full_discharge_blocks_on"].as_str().is_some(),
"PARTIAL gate must document full_discharge_blocks_on",
);
assert_eq!(
gate["ship_blocking"].as_bool(),
Some(true),
"GATE-ARCH-370M-008 must advertise ship_blocking:true — the \
gate's `verdict:pass` alone is insufficient green while \
discharge_status == PARTIAL_ALGORITHM_LEVEL",
);
}
#[test]
fn falsify_ship_013_val_ce_loss_threshold_logic() {
assert_eq!(
verdict_from_val_ce_loss(AC_SHIP2_003_MAX_VAL_CROSS_ENTROPY_LOSS),
Ship013Verdict::Pass,
"val CE == 2.2 must Pass (inclusive floor, not strict <)",
);
assert_eq!(verdict_from_val_ce_loss(2.2), Ship013Verdict::Pass, "literal 2.2 must Pass",);
let one_ulp_above = f32::from_bits(AC_SHIP2_003_MAX_VAL_CROSS_ENTROPY_LOSS.to_bits() + 1);
let one_ulp_below = f32::from_bits(AC_SHIP2_003_MAX_VAL_CROSS_ENTROPY_LOSS.to_bits() - 1);
assert!(one_ulp_above > AC_SHIP2_003_MAX_VAL_CROSS_ENTROPY_LOSS);
assert!(one_ulp_below < AC_SHIP2_003_MAX_VAL_CROSS_ENTROPY_LOSS);
assert_eq!(
verdict_from_val_ce_loss(one_ulp_above),
Ship013Verdict::Fail,
"one ULP above 2.2 must Fail (strictly above ceiling)",
);
assert_eq!(
verdict_from_val_ce_loss(one_ulp_below),
Ship013Verdict::Pass,
"one ULP below 2.2 must Pass (still under ceiling)",
);
for ce in [0.0_f32, 0.5, 1.0, 2.0, 2.199] {
assert_eq!(
verdict_from_val_ce_loss(ce),
Ship013Verdict::Pass,
"val CE = {ce} must Pass (in clear Pass band)",
);
}
for ce in [2.201_f32, 3.0, 10.0, f32::MAX] {
assert_eq!(
verdict_from_val_ce_loss(ce),
Ship013Verdict::Fail,
"val CE = {ce} must Fail (above 2.2 ceiling)",
);
}
assert_eq!(
verdict_from_val_ce_loss(f32::NAN),
Ship013Verdict::Fail,
"NaN val CE must Fail conservatively",
);
assert_eq!(
verdict_from_val_ce_loss(f32::INFINITY),
Ship013Verdict::Fail,
"+∞ val CE must Fail conservatively",
);
assert_eq!(
verdict_from_val_ce_loss(f32::NEG_INFINITY),
Ship013Verdict::Fail,
"-∞ val CE must Fail conservatively",
);
for neg_ce in [-0.001_f32, -1.0, -f32::INFINITY] {
assert_eq!(
verdict_from_val_ce_loss(neg_ce),
Ship013Verdict::Fail,
"negative val CE = {neg_ce} must Fail (CE ≥ 0 by definition)",
);
}
#[allow(clippy::float_cmp)]
{
assert_eq!(
AC_SHIP2_003_MAX_VAL_CROSS_ENTROPY_LOSS, 2.2_f32,
"MODEL-2 val CE ceiling is 2.2 \
(spec §5.2 AC-SHIP2-003; albor 370M Sovereign target)",
);
}
}
#[test]
fn falsify_ship_014_training_duration_threshold_logic() {
assert_eq!(
verdict_from_training_duration_days(AC_SHIP2_004_MAX_TRAINING_DURATION_DAYS),
Ship014Verdict::Pass,
"21 days must Pass (inclusive ceiling, not strict <)",
);
assert_eq!(
verdict_from_training_duration_days(21),
Ship014Verdict::Pass,
"literal 21 days must Pass",
);
assert_eq!(
verdict_from_training_duration_days(20),
Ship014Verdict::Pass,
"20 days must Pass (one day under ceiling)",
);
assert_eq!(
verdict_from_training_duration_days(22),
Ship014Verdict::Fail,
"22 days must Fail (one day over ceiling; Spec §9 Risk #4 escape hatch)",
);
for days in [0_u32, 1, 7, 14, 20, 21] {
assert_eq!(
verdict_from_training_duration_days(days),
Ship014Verdict::Pass,
"{days} days must Pass (in clear Pass band)",
);
}
for days in [22_u32, 30, 100, u32::MAX] {
assert_eq!(
verdict_from_training_duration_days(days),
Ship014Verdict::Fail,
"{days} days must Fail (above 21-day ceiling)",
);
}
let mut seen_fail = false;
for days in 0..=42_u32 {
let v = verdict_from_training_duration_days(days);
match (v, seen_fail) {
(Ship014Verdict::Pass, true) => {
panic!(
"monotonicity broken: day {days} flipped back to Pass \
after a previous Fail was observed",
);
}
(Ship014Verdict::Fail, _) => {
seen_fail = true;
}
_ => {}
}
}
assert_eq!(
verdict_from_training_duration_days(21),
Ship014Verdict::Pass,
"sweep boundary: day 21 must Pass",
);
assert_eq!(
verdict_from_training_duration_days(22),
Ship014Verdict::Fail,
"sweep boundary: day 22 must Fail",
);
assert_eq!(
AC_SHIP2_004_MAX_TRAINING_DURATION_DAYS, 21_u32,
"MODEL-2 training-budget ceiling is 21 days \
(spec §5.2 AC-SHIP2-004; RTX 4090 hardware budget)",
);
}
}