#[must_use]
fn from_architecture_generated(arch: &str) -> ArchConstraints {
match arch {
"gpt2" => ArchConstraints {
norm_type: NormType::LayerNorm,
activation: Activation::Gelu,
positional_encoding: PositionalEncoding::Absolute,
mlp_type: MlpType::GeluMlp,
weight_layout: WeightLayout::Conv1D,
has_bias: true,
tied_embeddings: true,
has_qk_norm: false,
default_eps: 1e-5,
},
"llama" | "llama3" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-5,
},
"qwen2" | "qwen2.5" | "qwen" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: true,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-6,
},
"qwen3" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: true,
default_eps: 1e-6,
},
"mistral" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-5,
},
"phi2" => ArchConstraints {
norm_type: NormType::LayerNorm,
activation: Activation::Gelu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::GeluMlp,
weight_layout: WeightLayout::Linear,
has_bias: true,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-5,
},
"phi" | "phi3" => ArchConstraints {
norm_type: NormType::LayerNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: true,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-5,
},
"gemma" | "gemma2" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Gelu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::GatedMlp,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: true,
has_qk_norm: false,
default_eps: 1e-6,
},
"deepseek" | "deepseek2" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-6,
},
"bert" => ArchConstraints {
norm_type: NormType::LayerNorm,
activation: Activation::Gelu,
positional_encoding: PositionalEncoding::Absolute,
mlp_type: MlpType::GeluMlp,
weight_layout: WeightLayout::Linear,
has_bias: true,
tied_embeddings: true,
has_qk_norm: false,
default_eps: 1e-12,
},
"whisper" => ArchConstraints {
norm_type: NormType::LayerNorm,
activation: Activation::Gelu,
positional_encoding: PositionalEncoding::Absolute,
mlp_type: MlpType::GeluMlp,
weight_layout: WeightLayout::Linear,
has_bias: true,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-5,
},
"t5" | "encoder-decoder" => ArchConstraints {
norm_type: NormType::LayerNorm,
activation: Activation::Gelu,
positional_encoding: PositionalEncoding::Relative,
mlp_type: MlpType::GeluMlp,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: true,
has_qk_norm: false,
default_eps: 1e-6,
},
"mamba" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::None,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: true,
has_qk_norm: false,
default_eps: 1e-5,
},
"qwen3_5" | "qwen3.5" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-6,
},
"qwen3_moe" | "qwen3_5_moe" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: true,
default_eps: 1e-6,
},
"falcon_h1" | "falcon-h1" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-6,
},
"openelm" => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-6,
},
"moonshine" => ArchConstraints {
norm_type: NormType::LayerNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::GatedMlp,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: true,
has_qk_norm: false,
default_eps: 1e-5,
},
"rwkv7" | "rwkv" => ArchConstraints {
norm_type: NormType::LayerNorm,
activation: Activation::Gelu,
positional_encoding: PositionalEncoding::None,
mlp_type: MlpType::GeluMlp,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-5,
},
_ => ArchConstraints {
norm_type: NormType::RmsNorm,
activation: Activation::Silu,
positional_encoding: PositionalEncoding::Rope,
mlp_type: MlpType::SwiGlu,
weight_layout: WeightLayout::Linear,
has_bias: false,
tied_embeddings: false,
has_qk_norm: false,
default_eps: 1e-5,
},
}
}