impl LayoutContract {
#[must_use]
pub fn new() -> Self {
let contracts = vec![
TensorContract {
gguf_name: "token_embd.weight",
apr_name: "model.embed_tokens.weight",
gguf_shape_formula: "[hidden, vocab]",
apr_shape_formula: "[vocab, hidden]",
should_transpose: true,
kernel_signature: "lookup (row = token embedding, not matmul)",
kernel_out_dim: "vocab_size",
kernel_in_dim: "hidden_dim",
is_critical: false,
notes: "Embedding lookup table - row = token embedding",
},
TensorContract {
gguf_name: "output.weight",
apr_name: "lm_head.weight",
gguf_shape_formula: "[hidden, vocab]",
apr_shape_formula: "[vocab, hidden]",
should_transpose: true,
kernel_signature: "matmul_q*k_rowmajor(W, x, vocab_size, hidden_dim)",
kernel_out_dim: "vocab_size",
kernel_in_dim: "hidden_dim",
is_critical: true,
notes: "GH-202 root cause - wrong shape caused [PAD] garbage output",
},
TensorContract {
gguf_name: "blk.{n}.attn_q.weight",
apr_name: "model.layers.{n}.self_attn.q_proj.weight",
gguf_shape_formula: "[hidden, heads*head_dim]",
apr_shape_formula: "[heads*head_dim, hidden]",
should_transpose: true,
kernel_signature: "matmul_q*k_rowmajor(W, x, num_heads*head_dim, hidden_dim)",
kernel_out_dim: "num_heads * head_dim",
kernel_in_dim: "hidden_dim",
is_critical: false,
notes: "Query projection in attention",
},
TensorContract {
gguf_name: "blk.{n}.attn_k.weight",
apr_name: "model.layers.{n}.self_attn.k_proj.weight",
gguf_shape_formula: "[hidden, kv_heads*head_dim]",
apr_shape_formula: "[kv_heads*head_dim, hidden]",
should_transpose: true,
kernel_signature: "matmul_q*k_rowmajor(W, x, num_kv_heads*head_dim, hidden_dim)",
kernel_out_dim: "num_kv_heads * head_dim",
kernel_in_dim: "hidden_dim",
is_critical: false,
notes: "Key projection in attention",
},
TensorContract {
gguf_name: "blk.{n}.attn_v.weight",
apr_name: "model.layers.{n}.self_attn.v_proj.weight",
gguf_shape_formula: "[hidden, kv_heads*head_dim]",
apr_shape_formula: "[kv_heads*head_dim, hidden]",
should_transpose: true,
kernel_signature: "matmul_q*k_rowmajor(W, x, num_kv_heads*head_dim, hidden_dim)",
kernel_out_dim: "num_kv_heads * head_dim",
kernel_in_dim: "hidden_dim",
is_critical: false,
notes: "Value projection in attention",
},
TensorContract {
gguf_name: "blk.{n}.attn_output.weight",
apr_name: "model.layers.{n}.self_attn.o_proj.weight",
gguf_shape_formula: "[heads*head_dim, hidden]",
apr_shape_formula: "[hidden, heads*head_dim]",
should_transpose: true,
kernel_signature: "matmul_q*k_rowmajor(W, x, hidden_dim, num_heads*head_dim)",
kernel_out_dim: "hidden_dim",
kernel_in_dim: "num_heads * head_dim",
is_critical: false,
notes: "Output projection in attention",
},
TensorContract {
gguf_name: "blk.{n}.ffn_gate.weight",
apr_name: "model.layers.{n}.mlp.gate_proj.weight",
gguf_shape_formula: "[hidden, intermediate]",
apr_shape_formula: "[intermediate, hidden]",
should_transpose: true,
kernel_signature: "matmul_q*k_rowmajor(W, x, intermediate_dim, hidden_dim)",
kernel_out_dim: "intermediate_dim",
kernel_in_dim: "hidden_dim",
is_critical: false,
notes: "Gate projection in SwiGLU MLP",
},
TensorContract {
gguf_name: "blk.{n}.ffn_up.weight",
apr_name: "model.layers.{n}.mlp.up_proj.weight",
gguf_shape_formula: "[hidden, intermediate]",
apr_shape_formula: "[intermediate, hidden]",
should_transpose: true,
kernel_signature: "matmul_q*k_rowmajor(W, x, intermediate_dim, hidden_dim)",
kernel_out_dim: "intermediate_dim",
kernel_in_dim: "hidden_dim",
is_critical: false,
notes: "Up projection in SwiGLU MLP",
},
TensorContract {
gguf_name: "blk.{n}.ffn_down.weight",
apr_name: "model.layers.{n}.mlp.down_proj.weight",
gguf_shape_formula: "[intermediate, hidden]",
apr_shape_formula: "[hidden, intermediate]",
should_transpose: true,
kernel_signature: "matmul_q*k_rowmajor(W, x, hidden_dim, intermediate_dim)",
kernel_out_dim: "hidden_dim",
kernel_in_dim: "intermediate_dim",
is_critical: false,
notes: "Down projection in SwiGLU MLP",
},
TensorContract {
gguf_name: "blk.{n}.attn_norm.weight",
apr_name: "model.layers.{n}.input_layernorm.weight",
gguf_shape_formula: "[hidden]",
apr_shape_formula: "[hidden]",
should_transpose: false,
kernel_signature: "element-wise multiply",
kernel_out_dim: "hidden_dim",
kernel_in_dim: "hidden_dim",
is_critical: false,
notes: "RMSNorm before attention - 1D tensor, no transpose",
},
TensorContract {
gguf_name: "blk.{n}.ffn_norm.weight",
apr_name: "model.layers.{n}.post_attention_layernorm.weight",
gguf_shape_formula: "[hidden]",
apr_shape_formula: "[hidden]",
should_transpose: false,
kernel_signature: "element-wise multiply",
kernel_out_dim: "hidden_dim",
kernel_in_dim: "hidden_dim",
is_critical: false,
notes: "RMSNorm before MLP - 1D tensor, no transpose",
},
TensorContract {
gguf_name: "output_norm.weight",
apr_name: "model.norm.weight",
gguf_shape_formula: "[hidden]",
apr_shape_formula: "[hidden]",
should_transpose: false,
kernel_signature: "element-wise multiply",
kernel_out_dim: "hidden_dim",
kernel_in_dim: "hidden_dim",
is_critical: false,
notes: "Final RMSNorm - 1D tensor, no transpose",
},
];
let mut gguf_contracts = HashMap::new();
let mut apr_contracts = HashMap::new();
for contract in contracts {
gguf_contracts.insert(contract.gguf_name, contract.clone());
apr_contracts.insert(contract.apr_name, contract);
}
Self {
gguf_contracts,
apr_contracts,
}
}
#[must_use]
pub fn get_gguf_contract(&self, name: &str) -> Option<&TensorContract> {
if let Some(contract) = self.gguf_contracts.get(name) {
return Some(contract);
}
let pattern = normalize_layer_pattern(name);
self.gguf_contracts.get(pattern.as_str())
}
#[must_use]
pub fn get_apr_contract(&self, name: &str) -> Option<&TensorContract> {
if let Some(contract) = self.apr_contracts.get(name) {
return Some(contract);
}
let pattern = normalize_layer_pattern(name);
self.apr_contracts.get(pattern.as_str())
}
#[must_use]
pub fn should_transpose_gguf(&self, name: &str) -> bool {
self.get_gguf_contract(name)
.map_or(false, |c| c.should_transpose)
}
#[must_use]
pub fn is_critical_tensor(&self, name: &str) -> bool {
self.get_gguf_contract(name)
.or_else(|| self.get_apr_contract(name))
.map_or(false, |c| c.is_critical)
}
pub fn validate_apr_shape(
&self,
name: &str,
shape: &[usize],
vocab_size: usize,
hidden_dim: usize,
) -> Result<(), ContractError> {
let Some(contract) = self.get_apr_contract(name) else {
return Ok(()); };
if contract.is_critical {
if name.contains("lm_head") || name.contains("output.weight") {
if shape.len() != 2 {
return Err(ContractError::ShapeMismatch {
tensor: name.to_string(),
expected: format!("[{}, {}]", vocab_size, hidden_dim),
actual: shape.to_vec(),
});
}
if shape[0] != vocab_size || shape[1] != hidden_dim {
return Err(ContractError::ShapeMismatch {
tensor: name.to_string(),
expected: format!("[{}, {}]", vocab_size, hidden_dim),
actual: shape.to_vec(),
});
}
}
}
Ok(())
}
#[must_use]
pub fn calculate_q4k_bytes(out_dim: usize, in_dim: usize) -> usize {
let superblocks = in_dim.div_ceil(block_sizes::QK_K);
out_dim * superblocks * block_sizes::Q4_K
}
#[must_use]
pub fn calculate_q6k_bytes(out_dim: usize, in_dim: usize) -> usize {
let superblocks = in_dim.div_ceil(block_sizes::QK_K);
out_dim * superblocks * block_sizes::Q6_K
}
pub fn validate_q6k_bytes(
&self,
name: &str,
actual_bytes: usize,
out_dim: usize,
in_dim: usize,
) -> Result<(), ContractError> {
let expected = Self::calculate_q6k_bytes(out_dim, in_dim);
if actual_bytes != expected {
return Err(ContractError::ByteSizeMismatch {
tensor: name.to_string(),
expected,
actual: actual_bytes,
});
}
Ok(())
}
#[must_use]
pub fn critical_tensors(&self) -> Vec<&TensorContract> {
self.gguf_contracts
.values()
.filter(|c| c.is_critical)
.collect()
}
#[must_use]
pub fn transpose_tensors(&self) -> Vec<&TensorContract> {
self.gguf_contracts
.values()
.filter(|c| c.should_transpose)
.collect()
}
#[must_use]
pub fn non_transpose_tensors(&self) -> Vec<&TensorContract> {
self.gguf_contracts
.values()
.filter(|c| !c.should_transpose)
.collect()
}
}
fn normalize_layer_pattern(name: &str) -> String {
let mut result = name.to_string();
if let Some(start) = result.find("blk.") {
let after_blk = start + 4;
if let Some(dot_pos) = result[after_blk..].find('.') {
let num_end = after_blk + dot_pos;
if result[after_blk..num_end]
.chars()
.all(|c| c.is_ascii_digit())
{
result = format!("{}{{n}}{}", &result[..after_blk], &result[num_end..]);
}
}
}
if let Some(start) = result.find("layers.") {
let after_layers = start + 7;
if let Some(dot_pos) = result[after_layers..].find('.') {
let num_end = after_layers + dot_pos;
if result[after_layers..num_end]
.chars()
.all(|c| c.is_ascii_digit())
{
result = format!("{}{{n}}{}", &result[..after_layers], &result[num_end..]);
}
}
}
result
}
pub fn contract() -> LayoutContract {
LayoutContract::new()
}
pub mod validation_rules {
pub const ALL_2D_TRANSPOSED: &str = "F-LAYOUT-CONTRACT-001";
pub const LM_HEAD_SHAPE: &str = "F-LAYOUT-CONTRACT-002";
pub const TENSORS_1D_UNCHANGED: &str = "F-LAYOUT-CONTRACT-003";
pub const BYTE_SIZE_MATCHES: &str = "F-LAYOUT-CONTRACT-004";
pub const NO_GARBAGE_OUTPUT: &str = "F-LAYOUT-CONTRACT-005";
}