use serde::{Deserialize, Serialize};
use trustformers_core::traits::Config;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct YiConfig {
pub vocab_size: usize,
pub hidden_size: usize,
pub intermediate_size: usize,
pub num_hidden_layers: usize,
pub num_attention_heads: usize,
pub num_key_value_heads: usize,
pub rope_theta: f64,
pub rms_norm_eps: f64,
pub max_position_embeddings: usize,
pub tie_word_embeddings: bool,
}
impl Default for YiConfig {
fn default() -> Self {
Self::yi_6b()
}
}
impl Config for YiConfig {
fn validate(&self) -> trustformers_core::errors::Result<()> {
use trustformers_core::errors::TrustformersError;
if self.hidden_size == 0 {
return Err(TrustformersError::invalid_config(
"hidden_size must be > 0".to_string(),
));
}
if !self.hidden_size.is_multiple_of(self.num_attention_heads) {
return Err(TrustformersError::invalid_config(
"hidden_size must be divisible by num_attention_heads".to_string(),
));
}
if !self.num_attention_heads.is_multiple_of(self.num_key_value_heads) {
return Err(TrustformersError::invalid_config(
"num_attention_heads must be divisible by num_key_value_heads".to_string(),
));
}
if self.vocab_size == 0 {
return Err(TrustformersError::invalid_config(
"vocab_size must be > 0".to_string(),
));
}
if self.num_hidden_layers == 0 {
return Err(TrustformersError::invalid_config(
"num_hidden_layers must be > 0".to_string(),
));
}
if self.intermediate_size == 0 {
return Err(TrustformersError::invalid_config(
"intermediate_size must be > 0".to_string(),
));
}
Ok(())
}
fn architecture(&self) -> &'static str {
"Yi-1.5"
}
}
impl YiConfig {
pub fn head_dim(&self) -> usize {
self.hidden_size / self.num_attention_heads
}
pub fn num_query_groups(&self) -> usize {
self.num_attention_heads / self.num_key_value_heads
}
pub fn yi_6b() -> Self {
Self {
vocab_size: 64000,
hidden_size: 4096,
intermediate_size: 11008,
num_hidden_layers: 32,
num_attention_heads: 32,
num_key_value_heads: 4,
rope_theta: 5_000_000.0,
rms_norm_eps: 1e-5,
max_position_embeddings: 4096,
tie_word_embeddings: true,
}
}
pub fn yi_9b() -> Self {
Self {
vocab_size: 64000,
hidden_size: 4096,
intermediate_size: 14336,
num_hidden_layers: 48,
num_attention_heads: 32,
num_key_value_heads: 4,
rope_theta: 5_000_000.0,
rms_norm_eps: 1e-5,
max_position_embeddings: 4096,
tie_word_embeddings: true,
}
}
pub fn yi_34b() -> Self {
Self {
vocab_size: 64000,
hidden_size: 7168,
intermediate_size: 20480,
num_hidden_layers: 60,
num_attention_heads: 56,
num_key_value_heads: 8,
rope_theta: 5_000_000.0,
rms_norm_eps: 1e-5,
max_position_embeddings: 4096,
tie_word_embeddings: true,
}
}
pub fn yi_6b_200k() -> Self {
Self {
max_position_embeddings: 200000,
..Self::yi_6b()
}
}
pub fn small_test() -> Self {
Self {
vocab_size: 512,
hidden_size: 64,
intermediate_size: 128,
num_hidden_layers: 2,
num_attention_heads: 4,
num_key_value_heads: 2,
rope_theta: 5_000_000.0,
rms_norm_eps: 1e-5,
max_position_embeddings: 64,
tie_word_embeddings: true,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use trustformers_core::traits::Config;
#[test]
fn test_default_is_6b() {
let cfg = YiConfig::default();
assert_eq!(cfg.vocab_size, 64000);
assert_eq!(cfg.hidden_size, 4096);
assert_eq!(cfg.num_hidden_layers, 32);
}
#[test]
fn test_6b_preset() {
let cfg = YiConfig::yi_6b();
assert_eq!(cfg.hidden_size, 4096);
assert_eq!(cfg.intermediate_size, 11008);
assert_eq!(cfg.num_attention_heads, 32);
assert_eq!(cfg.num_key_value_heads, 4);
assert_eq!(cfg.max_position_embeddings, 4096);
assert!(cfg.tie_word_embeddings);
}
#[test]
fn test_9b_preset() {
let cfg = YiConfig::yi_9b();
assert_eq!(cfg.hidden_size, 4096);
assert_eq!(cfg.num_hidden_layers, 48);
assert_eq!(cfg.num_key_value_heads, 4);
}
#[test]
fn test_34b_preset() {
let cfg = YiConfig::yi_34b();
assert_eq!(cfg.hidden_size, 7168);
assert_eq!(cfg.num_hidden_layers, 60);
assert_eq!(cfg.num_attention_heads, 56);
assert_eq!(cfg.num_key_value_heads, 8);
}
#[test]
fn test_6b_200k_long_context() {
let cfg = YiConfig::yi_6b_200k();
assert_eq!(cfg.max_position_embeddings, 200000);
assert_eq!(cfg.hidden_size, 4096);
}
#[test]
fn test_head_dim_6b() {
assert_eq!(YiConfig::yi_6b().head_dim(), 128);
}
#[test]
fn test_head_dim_34b() {
assert_eq!(YiConfig::yi_34b().head_dim(), 128);
}
#[test]
fn test_num_query_groups_6b() {
assert_eq!(YiConfig::yi_6b().num_query_groups(), 8);
}
#[test]
fn test_rope_theta_is_5m() {
assert!((YiConfig::yi_6b().rope_theta - 5_000_000.0).abs() < 1.0);
}
#[test]
fn test_tie_word_embeddings_true() {
assert!(YiConfig::yi_6b().tie_word_embeddings);
assert!(YiConfig::yi_34b().tie_word_embeddings);
}
#[test]
fn test_architecture_label() {
assert_eq!(YiConfig::default().architecture(), "Yi-1.5");
}
#[test]
fn test_validate_6b_ok() {
assert!(YiConfig::yi_6b().validate().is_ok());
}
#[test]
fn test_validate_34b_ok() {
assert!(YiConfig::yi_34b().validate().is_ok());
}
#[test]
fn test_validate_small_test_ok() {
assert!(YiConfig::small_test().validate().is_ok());
}
#[test]
fn test_validate_zero_hidden_size() {
let mut cfg = YiConfig::small_test();
cfg.hidden_size = 0;
assert!(cfg.validate().is_err());
}
#[test]
fn test_validate_hidden_not_divisible_by_heads() {
let mut cfg = YiConfig::small_test();
cfg.hidden_size = 65;
assert!(cfg.validate().is_err());
}
#[test]
fn test_validate_heads_not_divisible_by_kv_heads() {
let mut cfg = YiConfig::small_test();
cfg.num_key_value_heads = 3;
assert!(cfg.validate().is_err());
}
#[test]
fn test_validate_zero_vocab_size() {
let mut cfg = YiConfig::small_test();
cfg.vocab_size = 0;
assert!(cfg.validate().is_err());
}
#[test]
fn test_validate_zero_layers() {
let mut cfg = YiConfig::small_test();
cfg.num_hidden_layers = 0;
assert!(cfg.validate().is_err());
}
#[test]
fn test_clone_preserves_tie_word_embeddings() {
let cfg = YiConfig::yi_6b();
let cloned = cfg.clone();
assert_eq!(cfg.tie_word_embeddings, cloned.tie_word_embeddings);
assert_eq!(cfg.vocab_size, cloned.vocab_size);
}
#[test]
fn test_lcg_varied_context_lengths() {
let mut s = 37u64;
for _ in 0..4 {
s = s.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
let ctx = ((s % 8192) + 64) as usize;
let mut cfg = YiConfig::small_test();
cfg.max_position_embeddings = ctx;
assert!(cfg.validate().is_ok(), "ctx={ctx} failed");
}
}
}