fn generate_rust(families: &[FamilyData]) -> String {
let mut out = String::new();
out.push_str("// AUTO-GENERATED by build.rs (PMAT-250)\n");
out.push_str("// DO NOT EDIT — regenerated from contracts/model-families/*.yaml\n");
out.push_str("//\n");
out.push_str("// This file is included by src/format/model_family.rs via include!\n\n");
out.push_str("/// Known model family names (generated at build time from YAML contracts)\n");
out.push_str("pub const KNOWN_FAMILIES: &[&str] = &[\n");
for f in families {
out.push_str(&format!(" \"{}\",\n", f.family));
}
out.push_str("];\n\n");
for f in families {
let upper = f.family.to_uppercase();
out.push_str(&format!("/// {} family display name\n", f.display_name));
out.push_str(&format!(
"pub const {upper}_DISPLAY_NAME: &str = \"{}\";\n",
f.display_name
));
out.push_str(&format!(
"pub const {upper}_VENDOR: &str = \"{}\";\n",
f.vendor
));
for s in &f.sizes {
let size_upper = s.name.replace('.', "_").to_uppercase();
let prefix = format!("{upper}_{size_upper}");
out.push_str(&format!(
"pub const {prefix}_HIDDEN_DIM: usize = {};\n",
s.hidden_dim
));
out.push_str(&format!(
"pub const {prefix}_NUM_LAYERS: usize = {};\n",
s.num_layers
));
out.push_str(&format!(
"pub const {prefix}_NUM_HEADS: usize = {};\n",
s.num_heads
));
out.push_str(&format!(
"pub const {prefix}_NUM_KV_HEADS: usize = {};\n",
s.num_kv_heads
));
out.push_str(&format!(
"pub const {prefix}_INTERMEDIATE_DIM: usize = {};\n",
s.intermediate_dim
));
out.push_str(&format!(
"pub const {prefix}_VOCAB_SIZE: usize = {};\n",
s.vocab_size
));
out.push_str(&format!(
"pub const {prefix}_HEAD_DIM: usize = {};\n",
s.head_dim
));
out.push_str(&format!(
"pub const {prefix}_MAX_POSITION_EMBEDDINGS: usize = {};\n",
s.max_position_embeddings
));
}
out.push('\n');
out.push_str(&generate_algebraic_proofs(f));
}
out.push_str("/// Build a `FamilyRegistry` populated with all families from YAML contracts.\n");
out.push_str("///\n");
out.push_str("/// This function uses compiled-in data from build.rs — no runtime YAML\n");
out.push_str("/// parsing is needed. The data was validated at build time.\n");
out.push_str("#[must_use]\n");
out.push_str("pub fn build_default_registry() -> FamilyRegistry {\n");
out.push_str(" let mut registry = FamilyRegistry::new();\n\n");
for f in families {
out.push_str(&generate_family_registration(f));
}
out.push_str(" registry\n");
out.push_str("}\n");
out
}
fn generate_family_registration(f: &FamilyData) -> String {
let mut out = String::new();
out.push_str(" {\n");
out.push_str(" let mut size_variants = std::collections::HashMap::new();\n");
for s in &f.sizes {
out.push_str(&format!(
" size_variants.insert(\"{}\".to_string(), ModelSizeConfig {{\n",
s.name
));
out.push_str(&format!(
" parameters: \"{}\".to_string(),\n",
s.parameters
));
out.push_str(&format!(" hidden_dim: {},\n", s.hidden_dim));
out.push_str(&format!(" num_layers: {},\n", s.num_layers));
out.push_str(&format!(" num_heads: {},\n", s.num_heads));
out.push_str(&format!(" num_kv_heads: {},\n", s.num_kv_heads));
out.push_str(&format!(
" intermediate_dim: {},\n",
s.intermediate_dim
));
out.push_str(&format!(" vocab_size: {},\n", s.vocab_size));
out.push_str(&format!(
" max_position_embeddings: {},\n",
s.max_position_embeddings
));
out.push_str(&format!(" head_dim: {},\n", s.head_dim));
out.push_str(&format!(
" rope_theta: {}_f64,\n",
format_f64(s.rope_theta)
));
out.push_str(&format!(
" norm_eps: {}_f64,\n",
format_f64(s.norm_eps)
));
out.push_str(" });\n");
}
out.push_str(" let mut per_layer = std::collections::HashMap::new();\n");
for (role, pattern) in &f.per_layer_tensors {
out.push_str(&format!(
" per_layer.insert(\"{role}\".to_string(), Some(\"{pattern}\".to_string()));\n"
));
}
out.push_str(" let shapes = std::collections::HashMap::new();\n");
if f.chat_format.is_some() {
out.push_str(" // Chat template parsed at runtime if needed\n");
}
let has_gguf_entries = !f.gguf_per_layer.is_empty() || !f.gguf_skip_roles.is_empty();
if has_gguf_entries {
out.push_str(" let mut gguf_per_layer = std::collections::HashMap::new();\n");
for (role, suffix) in &f.gguf_per_layer {
out.push_str(&format!(
" gguf_per_layer.insert(\"{role}\".to_string(), Some(\"{suffix}\".to_string()));\n"
));
}
for role in &f.gguf_skip_roles {
out.push_str(&format!(
" gguf_per_layer.insert(\"{role}\".to_string(), None);\n"
));
}
} else {
out.push_str(" let gguf_per_layer = std::collections::HashMap::new();\n");
}
if f.gguf_fuse.is_empty() {
out.push_str(" let gguf_fuse = Vec::new();\n");
} else {
out.push_str(" let gguf_fuse = vec![\n");
for (gguf_suffix, sources) in &f.gguf_fuse {
let sources_str = sources
.iter()
.map(|s| format!("\"{s}\".to_string()"))
.collect::<Vec<_>>()
.join(", ");
out.push_str(&format!(
" GgufFusionRule {{ gguf_suffix: \"{gguf_suffix}\".to_string(), source_roles: vec![{sources_str}] }},\n"
));
}
out.push_str(" ];\n");
}
out.push_str(&format!(
" let config = ModelFamilyConfig {{\n\
\x20 family: \"{}\".to_string(),\n\
\x20 display_name: \"{}\".to_string(),\n\
\x20 vendor: \"{}\".to_string(),\n\
\x20 architectures: vec![{}],\n\
\x20 hf_pattern: \"{}\".to_string(),\n\
\x20 size_variants,\n\
\x20 constraints: ModelConstraints {{\n\
\x20 attention_type: AttentionType::from_str_contract(\"{}\").unwrap_or(AttentionType::Mha),\n\
\x20 activation: Activation::from_str_contract(\"{}\").unwrap_or(Activation::Silu),\n\
\x20 norm_type: NormType::from_str_contract(\"{}\").unwrap_or(NormType::RmsNorm),\n\
\x20 has_bias: {},\n\
\x20 tied_embeddings: {},\n\
\x20 positional_encoding: PositionalEncoding::from_str_contract(\"{}\").unwrap_or(PositionalEncoding::Rope),\n\
\x20 mlp_type: MlpType::from_str_contract(\"{}\").unwrap_or(MlpType::SwiGlu),\n\
\x20 qk_norm: {},\n\
\x20 }},\n\
\x20 tensor_template: TensorTemplate {{\n\
\x20 embedding: \"{}\".to_string(),\n\
\x20 lm_head: {},\n\
\x20 final_norm: {},\n\
\x20 per_layer,\n\
\x20 }},\n\
\x20 gguf_tensor_template: GgufTensorTemplate {{\n\
\x20 embedding: {},\n\
\x20 position_embedding: {},\n\
\x20 lm_head: {},\n\
\x20 final_norm_weight: {},\n\
\x20 final_norm_bias: {},\n\
\x20 per_layer: gguf_per_layer,\n\
\x20 transpose_weights: {},\n\
\x20 fuse: gguf_fuse,\n\
\x20 }},\n\
\x20 shape_template: ShapeTemplate {{ shapes }},\n\
\x20 quantizations: vec![{}],\n\
\x20 chat_template: None,\n\
\x20 certification: None,\n\
\x20 }};\n",
f.family,
f.display_name,
f.vendor,
f.architectures
.iter()
.map(|a| format!("\"{a}\".to_string()"))
.collect::<Vec<_>>()
.join(", "),
f.hf_pattern,
f.constraints.attention,
f.constraints.activation,
f.constraints.norm,
f.constraints.bias,
f.constraints.tied,
f.constraints.position,
f.constraints.mlp,
f.constraints.qk_norm,
f.embedding_tensor,
f.lm_head_tensor
.as_ref()
.map_or("None".to_string(), |s| format!("Some(\"{s}\".to_string())")),
f.final_norm_tensor
.as_ref()
.map_or("None".to_string(), |s| format!("Some(\"{s}\".to_string())")),
f.gguf_embedding
.as_ref()
.map_or("None".to_string(), |s| format!("Some(\"{s}\".to_string())")),
f.gguf_position_embedding
.as_ref()
.map_or("None".to_string(), |s| format!("Some(\"{s}\".to_string())")),
f.gguf_lm_head
.as_ref()
.map_or("None".to_string(), |s| format!("Some(\"{s}\".to_string())")),
f.gguf_final_norm_weight
.as_ref()
.map_or("None".to_string(), |s| format!("Some(\"{s}\".to_string())")),
f.gguf_final_norm_bias
.as_ref()
.map_or("None".to_string(), |s| format!("Some(\"{s}\".to_string())")),
f.gguf_transpose_weights,
f.quantizations
.iter()
.map(|q| format!("\"{q}\".to_string()"))
.collect::<Vec<_>>()
.join(", "),
));
out.push_str(" registry.register(Box::new(DynModelFamily::new(config)));\n }\n\n");
out
}
fn format_f64(v: f64) -> String {
if v == 0.0 {
"0.0".to_string()
} else if v.fract() == 0.0 {
format!("{v:.1}")
} else {
format!("{v}")
}
}
fn generate_algebraic_proofs(f: &FamilyData) -> String {
let mut out = String::new();
let upper = f.family.to_uppercase();
out.push_str(&format!("// ── Algebraic proofs for {} ──\n", f.family));
for s in &f.sizes {
let size_upper = s.name.replace('.', "_").to_uppercase();
let prefix = format!("{upper}_{size_upper}");
out.push_str(&format!(
"const _: () = assert!({prefix}_HIDDEN_DIM > 0, \
\"non-degeneracy: {}/{} hidden_dim must be positive\");\n",
f.family, s.name
));
out.push_str(&format!(
"const _: () = assert!({prefix}_NUM_LAYERS > 0, \
\"non-degeneracy: {}/{} num_layers must be positive\");\n",
f.family, s.name
));
out.push_str(&format!(
"const _: () = assert!({prefix}_NUM_HEADS > 0, \
\"non-degeneracy: {}/{} num_heads must be positive\");\n",
f.family, s.name
));
out.push_str(&format!(
"const _: () = assert!({prefix}_VOCAB_SIZE > 0, \
\"non-degeneracy: {}/{} vocab_size must be positive\");\n",
f.family, s.name
));
out.push_str(&format!(
"const _: () = assert!({prefix}_NUM_KV_HEADS > 0, \
\"non-degeneracy: {}/{} num_kv_heads must be positive\");\n",
f.family, s.name
));
out.push_str(&format!(
"const _: () = assert!({prefix}_NUM_KV_HEADS <= {prefix}_NUM_HEADS, \
\"GQA ordering: {}/{} num_kv_heads must be <= num_heads\");\n",
f.family, s.name
));
out.push_str(&format!(
"const _: () = assert!({prefix}_HIDDEN_DIM % {prefix}_NUM_HEADS == 0, \
\"Vaswani (2017): {}/{} hidden_dim must be divisible by num_heads\");\n",
f.family, s.name
));
if s.num_kv_heads > 1 {
out.push_str(&format!(
"const _: () = assert!({prefix}_NUM_HEADS % {prefix}_NUM_KV_HEADS == 0, \
\"Ainslie (2023) GQA: {}/{} num_heads must be divisible by num_kv_heads\");\n",
f.family, s.name
));
}
out.push_str(&format!(
"const _: () = assert!({prefix}_HEAD_DIM >= {prefix}_HIDDEN_DIM / {prefix}_NUM_HEADS, \
\"head_dim underflow: {}/{} head_dim must be >= hidden_dim/num_heads\");\n",
f.family, s.name
));
out.push_str(&format!(
"const _: () = assert!({prefix}_HEAD_DIM <= 2 * ({prefix}_HIDDEN_DIM / {prefix}_NUM_HEADS), \
\"head_dim overflow: {}/{} head_dim must be <= 2x hidden_dim/num_heads\");\n",
f.family, s.name
));
out.push_str(&format!(
"const _: () = assert!({prefix}_INTERMEDIATE_DIM > {prefix}_HIDDEN_DIM, \
\"Shazeer (2020) FFN expansion: {}/{} intermediate_dim must exceed hidden_dim\");\n",
f.family, s.name
));
}
let activation_mlp_valid = match (
f.constraints.mlp.as_str(),
f.constraints.activation.as_str(),
) {
("swiglu", "silu") => true,
("gelu_mlp", "gelu") => true,
("gated_mlp", "gelu") => true,
("gated_mlp", "silu") => true, (mlp, _) if mlp != "swiglu" && mlp != "gelu_mlp" && mlp != "gated_mlp" => true,
_ => false,
};
assert!(
activation_mlp_valid,
"PMAT-250: {} has inconsistent activation/MLP: activation={}, mlp={} \
(Shazeer 2020: swiglu→silu, gelu_mlp→gelu, gated_mlp→gelu)",
f.family, f.constraints.activation, f.constraints.mlp
);
if f.constraints.position == "rope" {
for s in &f.sizes {
let size_upper = s.name.replace('.', "_").to_uppercase();
let prefix = format!("{upper}_{size_upper}");
out.push_str(&format!(
"const _: () = assert!({prefix}_HEAD_DIM % 2 == 0, \
\"Su (2024) RoPE: {}/{} head_dim must be even for cos/sin pairs\");\n",
f.family, s.name
));
out.push_str(&format!(
"const _: () = assert!({prefix}_MAX_POSITION_EMBEDDINGS > 0, \
\"Su (2024) RoPE: {}/{} max_position_embeddings must be positive\");\n",
f.family, s.name
));
assert!(
s.rope_theta > 0.0,
"PMAT-250: {}/{} has rope_theta={} but positional_encoding=rope \
(Su et al., 2024 requires theta > 0)",
f.family,
s.name,
s.rope_theta
);
assert!(
s.rope_theta.is_finite(),
"PMAT-250: {}/{} has non-finite rope_theta={}",
f.family,
s.name,
s.rope_theta
);
}
}
for s in &f.sizes {
assert!(
s.norm_eps > 0.0,
"PMAT-250: {}/{} has norm_eps={} — must be positive \
(Zhang & Sennrich 2019: RMSNorm requires eps > 0 to prevent division by zero)",
f.family,
s.name,
s.norm_eps
);
assert!(
s.norm_eps < 1.0,
"PMAT-250: {}/{} has norm_eps={} — must be < 1.0 \
(values near 1.0 collapse all activations to zero in RMSNorm)",
f.family,
s.name,
s.norm_eps
);
assert!(
s.norm_eps.is_finite(),
"PMAT-250: {}/{} has non-finite norm_eps={}",
f.family,
s.name,
s.norm_eps
);
}
out.push('\n');
out
}