#![allow(clippy::doc_overindented_list_items)]
#![allow(dead_code)]
use std::marker::PhantomData;
#[derive(Debug, Clone, Copy, Default)]
pub struct HiddenDim<const N: usize>(PhantomData<()>);
impl<const N: usize> HiddenDim<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct NumHeads<const N: usize>(PhantomData<()>);
impl<const N: usize> NumHeads<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct NumKvHeads<const N: usize>(PhantomData<()>);
impl<const N: usize> NumKvHeads<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct HeadDim<const N: usize>(PhantomData<()>);
impl<const N: usize> HeadDim<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct IntermediateDim<const N: usize>(PhantomData<()>);
impl<const N: usize> IntermediateDim<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct NumLayers<const N: usize>(PhantomData<()>);
impl<const N: usize> NumLayers<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct VocabSize<const N: usize>(PhantomData<()>);
impl<const N: usize> VocabSize<N> {
pub const VALUE: usize = N;
pub const fn new() -> Self {
Self(PhantomData)
}
}
pub struct Llama370MConfig;
impl Llama370MConfig {
pub const PARAMETERS_NOMINAL: usize = 370_000_000;
pub const PARAMETERS_MIN: usize = 366_000_000;
pub const PARAMETERS_MAX: usize = 374_000_000;
pub const HIDDEN_DIM: usize = 1024;
pub const NUM_LAYERS: usize = 24;
pub const NUM_HEADS: usize = 16;
pub const NUM_KV_HEADS: usize = 4; pub const HEAD_DIM: usize = 64; pub const INTERMEDIATE_DIM: usize = 2816; pub const VOCAB_SIZE: usize = 50_000;
pub const MAX_POSITION_EMBEDDINGS: usize = 4096;
pub const ROPE_THETA: f32 = 10_000.0;
pub const RMS_NORM_EPS: f32 = 1.0e-5;
pub const TIED_EMBEDDINGS: bool = true; pub const HAS_BIAS: bool = false;
pub const fn validate() {
assert!(
Self::NUM_HEADS * Self::HEAD_DIM == Self::HIDDEN_DIM,
"INV-ARCH-370M-002 violated: num_heads * head_dim != hidden_dim",
);
assert!(
Self::NUM_KV_HEADS > 0 && Self::NUM_HEADS % Self::NUM_KV_HEADS == 0,
"INV-ARCH-370M-003 violated: num_kv_heads does not divide num_heads",
);
assert!(
Self::TIED_EMBEDDINGS,
"INV-ARCH-370M-004 violated: tied_embeddings must be true for 370M",
);
assert!(
Self::ROPE_THETA == 10_000.0_f32,
"INV-ARCH-370M-005 violated: rope_theta must be exactly 10000.0",
);
assert!(
Self::VOCAB_SIZE == 50_000,
"INV-ARCH-370M-006 violated: vocab_size must equal 50_000",
);
assert!(
!Self::HAS_BIAS,
"INV-ARCH-370M-008 violated: has_bias must be false (Llama convention)",
);
assert!(
Self::HIDDEN_DIM / Self::NUM_HEADS == Self::HEAD_DIM,
"hidden_dim / num_heads != head_dim — config internally inconsistent",
);
assert!(
Self::MAX_POSITION_EMBEDDINGS > 0 && Self::MAX_POSITION_EMBEDDINGS % 2 == 0,
"max_position_embeddings must be a positive even integer for RoPE",
);
}
}
#[allow(clippy::let_unit_value)]
const _: () = Llama370MConfig::validate();
#[must_use]
pub const fn estimated_param_count() -> usize {
estimated_stored_param_count() + (Llama370MConfig::VOCAB_SIZE * Llama370MConfig::HIDDEN_DIM)
}
#[must_use]
pub const fn estimated_stored_param_count() -> usize {
let h = Llama370MConfig::HIDDEN_DIM;
let l = Llama370MConfig::NUM_LAYERS;
let v = Llama370MConfig::VOCAB_SIZE;
let i = Llama370MConfig::INTERMEDIATE_DIM;
let nh = Llama370MConfig::NUM_HEADS;
let nkv = Llama370MConfig::NUM_KV_HEADS;
let hd = Llama370MConfig::HEAD_DIM;
let embedding = v * h;
let q = h * (nh * hd);
let k = h * (nkv * hd);
let vv = h * (nkv * hd);
let o = (nh * hd) * h;
let attn = q + k + vv + o;
let mlp = (h * i) + (h * i) + (i * h);
let norms = 2 * h;
let per_layer = attn + mlp + norms;
let final_norm = h;
embedding + l * per_layer + final_norm
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn config_matches_contract_values() {
assert_eq!(Llama370MConfig::HIDDEN_DIM, 1024);
assert_eq!(Llama370MConfig::NUM_LAYERS, 24);
assert_eq!(Llama370MConfig::NUM_HEADS, 16);
assert_eq!(Llama370MConfig::NUM_KV_HEADS, 4);
assert_eq!(Llama370MConfig::HEAD_DIM, 64);
assert_eq!(Llama370MConfig::INTERMEDIATE_DIM, 2816);
assert_eq!(Llama370MConfig::VOCAB_SIZE, 50_000);
assert_eq!(Llama370MConfig::MAX_POSITION_EMBEDDINGS, 4096);
assert!((Llama370MConfig::ROPE_THETA - 10_000.0_f32).abs() < 1e-6);
assert!((Llama370MConfig::RMS_NORM_EPS - 1.0e-5_f32).abs() < 1e-9);
assert!(Llama370MConfig::TIED_EMBEDDINGS);
assert!(!Llama370MConfig::HAS_BIAS);
assert_eq!(
Llama370MConfig::NUM_HEADS * Llama370MConfig::HEAD_DIM,
Llama370MConfig::HIDDEN_DIM,
);
assert_eq!(Llama370MConfig::NUM_HEADS % Llama370MConfig::NUM_KV_HEADS, 0);
}
#[test]
fn estimated_param_count_within_contract_band() {
let p = estimated_param_count();
let stored = estimated_stored_param_count();
eprintln!("albor-370m nominal param count = {p} ({} M)", p / 1_000_000,);
eprintln!(
"albor-370m stored param count = {stored} ({} M, lm_head tied)",
stored / 1_000_000,
);
assert!(
p >= Llama370MConfig::PARAMETERS_MIN,
"nominal param count {p} below INV-ARCH-370M-001 floor (366M)",
);
assert!(
p <= Llama370MConfig::PARAMETERS_MAX,
"nominal param count {p} above INV-ARCH-370M-001 ceiling (374M)",
);
let nominal = Llama370MConfig::PARAMETERS_NOMINAL as f64;
let pct = (p as f64 - nominal).abs() / nominal;
assert!(
pct < 0.05,
"nominal param count {p} differs from 370M by {:.2}% (> 5%)",
pct * 100.0,
);
assert_eq!(
p - stored,
Llama370MConfig::VOCAB_SIZE * Llama370MConfig::HIDDEN_DIM,
"tying accounting mismatch",
);
}
#[test]
fn validate_is_a_noop_at_runtime() {
Llama370MConfig::validate();
}
#[test]
fn shape_newtypes_compile_and_roundtrip() {
type Hidden = HiddenDim<{ Llama370MConfig::HIDDEN_DIM }>;
type Heads = NumHeads<{ Llama370MConfig::NUM_HEADS }>;
type KvHeads = NumKvHeads<{ Llama370MConfig::NUM_KV_HEADS }>;
type Head = HeadDim<{ Llama370MConfig::HEAD_DIM }>;
type Inter = IntermediateDim<{ Llama370MConfig::INTERMEDIATE_DIM }>;
type Layers = NumLayers<{ Llama370MConfig::NUM_LAYERS }>;
type Vocab = VocabSize<{ Llama370MConfig::VOCAB_SIZE }>;
assert_eq!(Hidden::VALUE, 1024);
assert_eq!(Heads::VALUE, 16);
assert_eq!(KvHeads::VALUE, 4);
assert_eq!(Head::VALUE, 64);
assert_eq!(Inter::VALUE, 2816);
assert_eq!(Layers::VALUE, 24);
assert_eq!(Vocab::VALUE, 50_000);
assert_eq!(std::mem::size_of::<Hidden>(), 0);
assert_eq!(std::mem::size_of::<Heads>(), 0);
}
const SOVEREIGN_CONTRACT_YAML: &str =
include_str!("../../../../contracts/model-families/llama-370m-sovereign-v1.yaml");
#[test]
fn falsify_ship_011_rust_scaffold_matches_yaml_contract() {
let doc: serde_yaml::Value = serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML)
.expect("llama-370m-sovereign-v1.yaml must parse as YAML");
assert_eq!(
doc["contract_id"].as_str(),
Some("C-LLAMA-370M-SOVEREIGN"),
"wrong contract loaded — check include_str! path",
);
assert_eq!(doc["family"].as_str(), Some("llama"));
assert_eq!(doc["size_variant"].as_str(), Some("370m"));
let arch = &doc["architecture"];
assert_eq!(
arch["hidden_dim"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::HIDDEN_DIM),
"YAML architecture.hidden_dim drifted from Rust const",
);
assert_eq!(
arch["num_layers"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::NUM_LAYERS),
);
assert_eq!(
arch["num_heads"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::NUM_HEADS),
);
assert_eq!(
arch["num_kv_heads"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::NUM_KV_HEADS),
);
assert_eq!(arch["head_dim"].as_u64().map(|v| v as usize), Some(Llama370MConfig::HEAD_DIM),);
assert_eq!(
arch["intermediate_dim"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::INTERMEDIATE_DIM),
);
assert_eq!(
arch["vocab_size"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::VOCAB_SIZE),
);
assert_eq!(
arch["max_position_embeddings"].as_u64().map(|v| v as usize),
Some(Llama370MConfig::MAX_POSITION_EMBEDDINGS),
);
let rope_theta = arch["rope_theta"].as_f64().expect("rope_theta must be a float");
assert!(
(rope_theta - f64::from(Llama370MConfig::ROPE_THETA)).abs() < 1e-6,
"YAML rope_theta {rope_theta} != Rust const {}",
Llama370MConfig::ROPE_THETA,
);
let constraints = &doc["constraints"];
assert_eq!(
constraints["tied_embeddings"].as_bool(),
Some(Llama370MConfig::TIED_EMBEDDINGS),
);
assert_eq!(constraints["has_bias"].as_bool(), Some(Llama370MConfig::HAS_BIAS),);
assert_eq!(constraints["attention_type"].as_str(), Some("gqa"));
assert_eq!(constraints["activation"].as_str(), Some("silu"));
assert_eq!(constraints["norm_type"].as_str(), Some("rmsnorm"));
assert_eq!(constraints["positional_encoding"].as_str(), Some("rope"));
assert_eq!(constraints["mlp_type"].as_str(), Some("swiglu"));
}
#[test]
fn falsify_ship_011_sovereign_contract_is_active() {
let doc: serde_yaml::Value =
serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML).expect("parse sovereign contract");
assert_eq!(
doc["status"].as_str(),
Some("ACTIVE"),
"C-LLAMA-370M-SOVEREIGN must be ACTIVE once FALSIFY-SHIP-011 \
discharges — PROPOSED contracts cannot gate a ship",
);
}
fn enumerate_370m_apr_tensors() -> Vec<(String, Vec<usize>)> {
let h = Llama370MConfig::HIDDEN_DIM;
let v = Llama370MConfig::VOCAB_SIZE;
let i = Llama370MConfig::INTERMEDIATE_DIM;
let nh = Llama370MConfig::NUM_HEADS;
let nkv = Llama370MConfig::NUM_KV_HEADS;
let hd = Llama370MConfig::HEAD_DIM;
let layers = Llama370MConfig::NUM_LAYERS;
let mut out: Vec<(String, Vec<usize>)> = Vec::with_capacity(3 + 9 * layers);
out.push(("model.embed_tokens.weight".into(), vec![v, h]));
out.push(("lm_head.weight".into(), vec![v, h]));
for n in 0..layers {
out.push((format!("model.layers.{n}.self_attn.q_proj.weight"), vec![nh * hd, h]));
out.push((format!("model.layers.{n}.self_attn.k_proj.weight"), vec![nkv * hd, h]));
out.push((format!("model.layers.{n}.self_attn.v_proj.weight"), vec![nkv * hd, h]));
out.push((format!("model.layers.{n}.self_attn.o_proj.weight"), vec![h, nh * hd]));
out.push((format!("model.layers.{n}.mlp.gate_proj.weight"), vec![i, h]));
out.push((format!("model.layers.{n}.mlp.up_proj.weight"), vec![i, h]));
out.push((format!("model.layers.{n}.mlp.down_proj.weight"), vec![h, i]));
out.push((format!("model.layers.{n}.input_layernorm.weight"), vec![h]));
out.push((format!("model.layers.{n}.post_attention_layernorm.weight"), vec![h]));
}
out.push(("model.norm.weight".into(), vec![h]));
out
}
#[test]
fn falsify_ship_019_layout_contract_covers_every_370m_tensor() {
use aprender::format::layout_contract::LayoutContract;
let contract = LayoutContract::new();
let tensors = enumerate_370m_apr_tensors();
assert_eq!(
tensors.len(),
3 + 9 * Llama370MConfig::NUM_LAYERS,
"370M enumerator produced wrong tensor count — scaffold drift",
);
for (name, _) in &tensors {
assert!(
contract.get_apr_contract(name).is_some(),
"370M tensor `{name}` has no layout_contract entry — \
LAYOUT-001 coverage gap (every tensor in this model must \
pattern-match a TensorContract or GGUF export layout will \
silently skip it)",
);
}
let lm = tensors
.iter()
.find(|(n, _)| n == "lm_head.weight")
.expect("lm_head must be enumerated");
assert_eq!(
lm.1,
vec![Llama370MConfig::VOCAB_SIZE, Llama370MConfig::HIDDEN_DIM],
"lm_head.weight must be row-major [vocab, hidden] — GH-202 \
root cause; reversed `[hidden, vocab]` produces [PAD] garbage",
);
let embed = tensors
.iter()
.find(|(n, _)| n == "model.embed_tokens.weight")
.expect("embed_tokens must be enumerated");
assert_eq!(
embed.1,
vec![Llama370MConfig::VOCAB_SIZE, Llama370MConfig::HIDDEN_DIM],
"embed_tokens.weight must be row-major [vocab, hidden]",
);
let k0 = tensors
.iter()
.find(|(n, _)| n == "model.layers.0.self_attn.k_proj.weight")
.expect("k_proj layer 0 must be enumerated");
assert_eq!(
k0.1,
vec![
Llama370MConfig::NUM_KV_HEADS * Llama370MConfig::HEAD_DIM,
Llama370MConfig::HIDDEN_DIM,
],
"k_proj must be row-major [kv_heads*head_dim, hidden] — GQA",
);
let q0 = tensors
.iter()
.find(|(n, _)| n == "model.layers.0.self_attn.q_proj.weight")
.expect("q_proj layer 0 must be enumerated");
assert_eq!(
q0.1,
vec![
Llama370MConfig::NUM_HEADS * Llama370MConfig::HEAD_DIM,
Llama370MConfig::HIDDEN_DIM,
],
"q_proj must be row-major [heads*head_dim, hidden]",
);
contract
.validate_apr_shape(
"lm_head.weight",
&[Llama370MConfig::VOCAB_SIZE, Llama370MConfig::HIDDEN_DIM],
Llama370MConfig::VOCAB_SIZE,
Llama370MConfig::HIDDEN_DIM,
)
.expect("correct [vocab, hidden] lm_head must validate");
let bad = contract.validate_apr_shape(
"lm_head.weight",
&[Llama370MConfig::HIDDEN_DIM, Llama370MConfig::VOCAB_SIZE],
Llama370MConfig::VOCAB_SIZE,
Llama370MConfig::HIDDEN_DIM,
);
assert!(
bad.is_err(),
"reversed [hidden, vocab] lm_head MUST be rejected by the \
layout contract — this is GH-202 regression protection",
);
}
#[test]
fn falsify_ship_019_gate_arch_370m_004_has_partial_discharge_marker() {
let doc: serde_yaml::Value =
serde_yaml::from_str(SOVEREIGN_CONTRACT_YAML).expect("parse sovereign contract");
let gates =
doc["gates"].as_sequence().expect("gates must be a sequence in sovereign contract");
let gate = gates
.iter()
.find(|g| g["id"].as_str() == Some("GATE-ARCH-370M-004"))
.expect("GATE-ARCH-370M-004 must exist in sovereign contract");
assert_eq!(
gate["falsification_id"].as_str(),
Some("FALSIFY-SHIP-019"),
"GATE-ARCH-370M-004 must bind FALSIFY-SHIP-019",
);
assert_eq!(
gate["binds_to"].as_str(),
Some("AC-SHIP2-009"),
"GATE-ARCH-370M-004 must bind AC-SHIP2-009",
);
assert_eq!(
gate["discharge_status"].as_str(),
Some("PARTIAL_ALGORITHM_LEVEL"),
"GATE-ARCH-370M-004 must advertise PARTIAL_ALGORITHM_LEVEL \
(full discharge blocks on real trained 370M .apr)",
);
let evidence = gate["evidence_discharged_by"]
.as_sequence()
.expect("GATE-ARCH-370M-004 must have evidence_discharged_by");
assert!(
!evidence.is_empty(),
"GATE-ARCH-370M-004 evidence_discharged_by must list \
at least one test function or artifact",
);
assert!(
gate["full_discharge_blocks_on"].as_str().is_some(),
"PARTIAL gate must document full_discharge_blocks_on",
);
assert_eq!(
gate["ship_blocking"].as_bool(),
Some(true),
"GATE-ARCH-370M-004 must advertise ship_blocking:true — the \
gate's `verdict:pass` alone is insufficient green while \
discharge_status == PARTIAL_ALGORITHM_LEVEL",
);
}
}