use super::types::GGUFModel;
use crate::error::{RealizarError, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NormType {
LayerNorm,
RmsNorm,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Activation {
Gelu,
Silu,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PositionalEncoding {
Absolute,
Rope,
Alibi,
Relative,
None,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MlpType {
GeluMlp,
SwiGlu,
GatedMlp,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WeightLayout {
Linear,
Conv1D,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ArchConstraints {
pub norm_type: NormType,
pub activation: Activation,
pub positional_encoding: PositionalEncoding,
pub mlp_type: MlpType,
pub weight_layout: WeightLayout,
pub has_bias: bool,
pub tied_embeddings: bool,
pub has_qk_norm: bool,
pub default_eps: f32,
}
include!(concat!(env!("OUT_DIR"), "/arch_constraints_generated.rs"));
impl ArchConstraints {
#[must_use]
pub fn from_architecture(arch: &str) -> Self {
from_architecture_generated(arch)
}
#[must_use]
pub fn uses_rope(&self) -> bool {
self.positional_encoding == PositionalEncoding::Rope
}
#[must_use]
pub fn uses_rmsnorm(&self) -> bool {
self.norm_type == NormType::RmsNorm
}
#[must_use]
pub fn needs_transpose(&self) -> bool {
self.weight_layout == WeightLayout::Conv1D
}
#[must_use]
pub fn has_gate_ffn(&self) -> bool {
!matches!(self.mlp_type, MlpType::GeluMlp)
}
#[must_use]
pub fn uses_absolute_positions(&self) -> bool {
self.positional_encoding == PositionalEncoding::Absolute
}
#[must_use]
pub fn uses_relative_positions(&self) -> bool {
self.positional_encoding == PositionalEncoding::Relative
}
}
#[must_use]
pub fn infer_rope_type(arch: &str) -> u32 {
let arch_lower = arch.to_lowercase();
const NEOX_ARCHITECTURES: &[&str] = &[
"qwen",
"qwen2",
"qwen3",
"qwen3_5",
"qwen3.5",
"stablelm",
"phi2",
"phi3",
"phi",
"gemma",
"gemma2",
"gemma3",
"starcoder2",
"gptneox",
"gpt_neox",
"falcon",
"falcon_h1",
"codeshell",
"orion",
"bert",
"nomic-bert",
"dbrx",
"olmo2",
"olmoe",
"plamo",
"plamo2",
"openelm",
"exaone",
"minicpm3",
"nemotron",
"internlm2",
"deepseek",
"deepseek2",
];
for &neox_arch in NEOX_ARCHITECTURES {
if arch_lower.contains(neox_arch) {
return 2; }
}
0
}
#[derive(Debug, Clone)]
pub struct GGUFConfig {
pub architecture: String,
pub constraints: ArchConstraints,
pub hidden_dim: usize,
pub num_layers: usize,
pub num_heads: usize,
pub num_kv_heads: usize,
pub vocab_size: usize,
pub intermediate_dim: usize,
pub context_length: usize,
pub rope_theta: f32,
pub eps: f32,
pub rope_type: u32,
pub explicit_head_dim: Option<usize>,
pub bos_token_id: Option<u32>,
pub eos_token_id: Option<u32>,
}
fn default_bos_for_architecture(arch: &str) -> Option<u32> {
match arch {
"qwen2" | "qwen3" | "qwen3moe" => Some(151_643),
"llama" => Some(128_000),
"mistral" => Some(1),
"gemma" | "gemma2" => Some(2),
"deepseek" | "deepseek2" => Some(0),
"phi3" => Some(1),
_ => None,
}
}
pub(crate) fn default_eos_for_architecture(arch: &str) -> Option<u32> {
match arch {
"qwen2" | "qwen3" | "qwen3moe" => Some(151_645),
"qwen3_5" => Some(248_044),
"llama" => Some(128_001),
"mistral" => Some(2),
"gemma" | "gemma2" => Some(1),
"deepseek" | "deepseek2" => Some(1),
"phi3" => Some(32_000),
"phi2" | "phi" | "gpt2" => Some(50_256),
_ => None,
}
}
pub(crate) fn default_rope_theta_for_architecture(arch: &str) -> f32 {
match arch {
"qwen2" | "qwen3" => 1_000_000.0,
"llama" | "mistral" | "gemma" | "gemma2" | "deepseek" | "deepseek2" => 10_000.0,
"phi2" | "phi3" | "phi" => 10_000.0,
_ => 10_000.0,
}
}
impl GGUFConfig {
pub fn from_apr(apr: &crate::apr::MappedAprModel, vocab_size: usize) -> Result<Self> {
let architecture = apr.metadata.architecture.clone().ok_or_else(|| {
RealizarError::InvalidConfiguration(
"C-01: APR model missing 'architecture' metadata — cannot infer model type".into(),
)
})?;
let hidden_dim = apr.metadata.hidden_size.ok_or_else(|| {
RealizarError::InvalidConfiguration(
"C-03: APR model missing 'hidden_size' metadata".into(),
)
})?;
let num_layers = apr.metadata.num_layers.ok_or_else(|| {
RealizarError::InvalidConfiguration(
"C-03: APR model missing 'num_layers' metadata".into(),
)
})?;
let num_heads = apr.metadata.num_heads.ok_or_else(|| {
RealizarError::InvalidConfiguration(
"C-03: APR model missing 'num_heads' metadata".into(),
)
})?;
let num_kv_heads = apr.metadata.num_kv_heads.unwrap_or(num_heads);
let intermediate_dim = apr.metadata.intermediate_size.ok_or_else(|| {
RealizarError::InvalidConfiguration(
"C-03: APR model missing 'intermediate_size' metadata".into(),
)
})?;
let constraints = ArchConstraints::from_architecture(&architecture);
let eps = apr.metadata.rms_norm_eps.unwrap_or(constraints.default_eps);
let rope_theta = apr
.metadata
.rope_theta
.unwrap_or_else(|| default_rope_theta_for_architecture(&architecture));
let rope_type = apr
.metadata
.rope_type
.unwrap_or_else(|| infer_rope_type(&architecture));
let context_length = apr.metadata.max_position_embeddings.unwrap_or(0);
let eos_token_id = apr
.metadata
.get_embedded_eos_token_id()
.or_else(|| default_eos_for_architecture(&architecture));
let bos_token_id = apr.metadata.get_embedded_bos_token_id();
Ok(Self {
architecture,
constraints,
vocab_size,
hidden_dim,
num_layers,
num_heads,
num_kv_heads,
intermediate_dim,
eps,
rope_theta,
rope_type,
context_length,
explicit_head_dim: None,
bos_token_id,
eos_token_id,
})
}
#[inline]
#[must_use]
pub fn head_dim(&self) -> usize {
self.explicit_head_dim.unwrap_or(if self.num_heads > 0 {
self.hidden_dim / self.num_heads
} else {
self.hidden_dim
})
}
#[inline]
#[must_use]
pub fn q_dim(&self) -> usize {
self.num_heads * self.head_dim()
}
#[inline]
#[must_use]
pub fn kv_dim(&self) -> usize {
self.num_kv_heads * self.head_dim()
}
#[must_use]
pub fn is_encoder_decoder(&self) -> bool {
let arch = self.architecture.to_lowercase();
arch == "t5" || arch == "encoder-decoder" || arch == "whisper"
}
fn infer_explicit_head_dim(
model: &GGUFModel,
hidden_dim: usize,
num_heads: usize,
) -> Option<usize> {
let default_head_dim = if num_heads > 0 {
hidden_dim / num_heads
} else {
hidden_dim
};
model
.key_length()
.or_else(|| {
model
.tensors
.iter()
.find(|t| t.name == "blk.0.attn_q.weight")
.and_then(|t| {
let d0 = t.dims.first().copied()? as usize;
if d0 > 0 && num_heads > 0 && d0.is_multiple_of(num_heads) {
Some(d0 / num_heads)
} else {
None
}
})
})
.filter(|&hd| hd != default_head_dim)
}
fn infer_intermediate_dim(model: &GGUFModel, hidden_dim: usize) -> usize {
let extract_dim = |dims: &[u64]| -> usize {
let d0 = dims.first().copied().unwrap_or(hidden_dim as u64 * 4) as usize;
let d1 = dims.get(1).copied().unwrap_or(hidden_dim as u64) as usize;
if d1 == hidden_dim {
d0
} else if d0 == hidden_dim {
d1
} else {
d0
}
};
model
.tensors
.iter()
.find(|t| t.name == "blk.0.ffn_down.weight")
.map(|t| extract_dim(&t.dims))
.or_else(|| {
model
.tensors
.iter()
.find(|t| t.name == "blk.0.ffn_up.weight")
.map(|t| extract_dim(&t.dims))
})
.unwrap_or(hidden_dim * 4)
}
pub fn from_gguf(model: &GGUFModel) -> Result<Self> {
let architecture = model
.architecture()
.ok_or_else(|| RealizarError::InvalidShape {
reason: "Missing general.architecture in GGUF metadata".to_string(),
})?
.to_string();
let hidden_dim = model
.embedding_dim()
.ok_or_else(|| RealizarError::InvalidShape {
reason: "Missing embedding_length in GGUF metadata".to_string(),
})?;
let num_layers = model
.num_layers()
.ok_or_else(|| RealizarError::InvalidShape {
reason: "Missing block_count in GGUF metadata".to_string(),
})?;
let num_heads = model.num_heads().unwrap_or(hidden_dim / 64);
let vocab_size = model
.tensors
.iter()
.find(|t| t.name == "token_embd.weight")
.and_then(|t| t.dims.first().copied())
.unwrap_or(0) as usize;
let intermediate_dim = Self::infer_intermediate_dim(model, hidden_dim);
let context_length = model.context_length().unwrap_or(0);
let rope_theta = model
.rope_freq_base()
.unwrap_or_else(|| default_rope_theta_for_architecture(&architecture));
let constraints = ArchConstraints::from_architecture(&architecture);
let eps = model.rms_epsilon().unwrap_or(constraints.default_eps);
let num_kv_heads = model.num_kv_heads().unwrap_or(num_heads);
let explicit_head_dim = Self::infer_explicit_head_dim(model, hidden_dim, num_heads);
let rope_type = model.rope_type().unwrap_or(0);
let bos_token_id = model.bos_token_id().or_else(|| {
let fallback = default_bos_for_architecture(&architecture);
if fallback.is_some() {
eprintln!(
"[BOS-FALLBACK] No tokenizer.ggml.bos_token_id in GGUF — using architecture default for '{architecture}'"
);
}
fallback
});
let eos_token_id = model
.eos_token_id()
.or_else(|| default_eos_for_architecture(&architecture));
Ok(Self {
architecture,
constraints,
hidden_dim,
num_layers,
num_heads,
num_kv_heads,
vocab_size,
intermediate_dim,
context_length,
rope_theta,
eps,
rope_type,
explicit_head_dim,
bos_token_id,
eos_token_id,
})
}
}
#[derive(Debug, Clone)]
pub struct ValidatedModelConfig {
inner: GGUFConfig,
}
impl ValidatedModelConfig {
pub fn validate(config: GGUFConfig) -> Result<Self> {
if config.hidden_dim == 0 {
return Err(RealizarError::InvalidShape {
reason: "hidden_dim must be > 0".to_string(),
});
}
if config.num_layers == 0 {
return Err(RealizarError::InvalidShape {
reason: "num_layers must be > 0".to_string(),
});
}
if config.vocab_size == 0 {
return Err(RealizarError::InvalidShape {
reason: "vocab_size must be > 0".to_string(),
});
}
if config.num_heads == 0 {
return Err(RealizarError::InvalidShape {
reason: "num_heads must be > 0".to_string(),
});
}
if config.num_kv_heads == 0 {
return Err(RealizarError::InvalidShape {
reason: "num_kv_heads must be > 0".to_string(),
});
}
if config.intermediate_dim == 0 {
return Err(RealizarError::InvalidShape {
reason: "intermediate_dim must be > 0".to_string(),
});
}
if config.explicit_head_dim.is_none() && !config.hidden_dim.is_multiple_of(config.num_heads)
{
return Err(RealizarError::InvalidShape {
reason: format!(
"hidden_dim ({}) must be divisible by num_heads ({}) when head_dim is derived",
config.hidden_dim, config.num_heads
),
});
}
if config.head_dim() == 0 {
return Err(RealizarError::InvalidShape {
reason: "head_dim must be > 0".to_string(),
});
}
if !config.num_heads.is_multiple_of(config.num_kv_heads) {
return Err(RealizarError::InvalidShape {
reason: format!(
"num_heads ({}) must be divisible by num_kv_heads ({}) — GQA ratio must be an integer",
config.num_heads, config.num_kv_heads
),
});
}
validate_metadata_bounds(&config)?;
Ok(Self { inner: config })
}
pub fn from_gguf(model: &GGUFModel) -> Result<Self> {
let config = GGUFConfig::from_gguf(model)?;
Self::validate(config)
}
pub fn from_apr(apr: &crate::apr::MappedAprModel, vocab_size: usize) -> Result<Self> {
let config = GGUFConfig::from_apr(apr, vocab_size)?;
Self::validate(config)
}
pub fn from_safetensors_config(config: &crate::SafetensorsConfig) -> Result<Self> {
let hidden_dim = config
.hidden_size
.ok_or_else(|| RealizarError::InvalidShape {
reason: "config.json missing hidden_size".to_string(),
})?;
let num_layers = config
.num_hidden_layers
.ok_or_else(|| RealizarError::InvalidShape {
reason: "config.json missing num_hidden_layers".to_string(),
})?;
let num_heads = config
.num_attention_heads
.ok_or_else(|| RealizarError::InvalidShape {
reason: "config.json missing num_attention_heads".to_string(),
})?;
let num_kv_heads = config.num_kv_heads();
let vocab_size = config
.vocab_size
.ok_or_else(|| RealizarError::InvalidShape {
reason: "config.json missing vocab_size".to_string(),
})?;
let intermediate_dim = config.intermediate_size.unwrap_or(hidden_dim * 4);
let context_length = config.max_position_embeddings.unwrap_or(0);
let architecture = config.architecture();
let rope_theta = config
.rope_theta
.unwrap_or_else(|| default_rope_theta_for_architecture(&architecture));
let eps = config.rms_norm_eps.unwrap_or(1e-6);
let constraints = ArchConstraints::from_architecture(&architecture);
let rope_type = infer_rope_type(&architecture);
let raw = GGUFConfig {
architecture,
constraints,
hidden_dim,
num_layers,
num_heads,
num_kv_heads,
vocab_size,
intermediate_dim,
context_length,
rope_theta,
eps,
rope_type,
explicit_head_dim: None,
bos_token_id: config.bos_token_id,
eos_token_id: config.eos_token_id,
};
Self::validate(raw)
}
#[must_use]
pub fn architecture(&self) -> &str {
&self.inner.architecture
}
#[must_use]
pub fn constraints(&self) -> &ArchConstraints {
&self.inner.constraints
}
#[must_use]
pub fn hidden_dim(&self) -> usize {
self.inner.hidden_dim
}
#[must_use]
pub fn num_layers(&self) -> usize {
self.inner.num_layers
}
#[must_use]
pub fn num_heads(&self) -> usize {
self.inner.num_heads
}
#[must_use]
pub fn num_kv_heads(&self) -> usize {
self.inner.num_kv_heads
}
#[must_use]
pub fn vocab_size(&self) -> usize {
self.inner.vocab_size
}
#[must_use]
pub fn intermediate_dim(&self) -> usize {
self.inner.intermediate_dim
}
#[must_use]
pub fn context_length(&self) -> usize {
self.inner.context_length
}
#[must_use]
pub fn rope_theta(&self) -> f32 {
self.inner.rope_theta
}
#[must_use]
pub fn eps(&self) -> f32 {
self.inner.eps
}
#[must_use]
pub fn rope_type(&self) -> u32 {
self.inner.rope_type
}
#[must_use]
pub fn bos_token_id(&self) -> Option<u32> {
self.inner.bos_token_id
}
#[must_use]
pub fn eos_token_id(&self) -> Option<u32> {
self.inner.eos_token_id
}
#[must_use]
pub fn head_dim(&self) -> usize {
self.inner.head_dim()
}
#[must_use]
pub fn q_dim(&self) -> usize {
self.inner.q_dim()
}
#[must_use]
pub fn kv_dim(&self) -> usize {
self.inner.kv_dim()
}
#[must_use]
pub fn config(&self) -> &GGUFConfig {
&self.inner
}
#[must_use]
pub fn into_inner(self) -> GGUFConfig {
self.inner
}
}
impl std::ops::Deref for ValidatedModelConfig {
type Target = GGUFConfig;
fn deref(&self) -> &GGUFConfig {
&self.inner
}
}
fn validate_metadata_bounds(config: &GGUFConfig) -> Result<()> {
check_usize_max(config.hidden_dim, 65_536, "hidden_dim")?;
check_usize_max(config.num_layers, 256, "num_layers")?;
check_usize_max(config.num_heads, 256, "num_heads")?;
check_usize_max(config.num_kv_heads, 256, "num_kv_heads")?;
check_usize_max(config.vocab_size, 1_000_000, "vocab_size")?;
check_usize_max(config.intermediate_dim, 262_144, "intermediate_dim")?;
check_usize_max(config.context_length, 2_097_152, "context_length")?;
if config.rope_theta > 0.0 && config.rope_theta < 1.0 {
return Err(RealizarError::InvalidShape {
reason: format!(
"rope_theta {} below minimum 1.0 (model-metadata-bounds-v1)",
config.rope_theta
),
});
}
if config.rope_theta > 100_000_000.0 {
return Err(RealizarError::InvalidShape {
reason: format!(
"rope_theta {} exceeds max 100000000.0 (model-metadata-bounds-v1)",
config.rope_theta
),
});
}
if config.eps > 0.0 && config.eps < 1e-10 {
return Err(RealizarError::InvalidShape {
reason: format!(
"eps {} below minimum 1e-10 (model-metadata-bounds-v1)",
config.eps
),
});
}
if config.eps > 0.01 {
return Err(RealizarError::InvalidShape {
reason: format!(
"eps {} exceeds max 0.01 (model-metadata-bounds-v1)",
config.eps
),
});
}
Ok(())
}
fn check_usize_max(value: usize, max: usize, field: &str) -> Result<()> {
if value > max {
return Err(RealizarError::InvalidShape {
reason: format!("{field} {value} exceeds max {max} (model-metadata-bounds-v1)"),
});
}
Ok(())
}
include!("config_validated.rs");