use serde::{Deserialize, Serialize};
use trustformers_core::traits::Config;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Phi2Config {
pub vocab_size: usize,
pub hidden_size: usize,
pub intermediate_size: usize,
pub num_hidden_layers: usize,
pub num_attention_heads: usize,
pub max_position_embeddings: usize,
pub rope_theta: f64,
pub layer_norm_eps: f64,
pub initializer_range: f64,
}
impl Default for Phi2Config {
fn default() -> Self {
Self {
vocab_size: 51200,
hidden_size: 2560,
intermediate_size: 10240,
num_hidden_layers: 32,
num_attention_heads: 32,
max_position_embeddings: 2048,
rope_theta: 10000.0,
layer_norm_eps: 1e-5,
initializer_range: 0.02,
}
}
}
impl Config for Phi2Config {
fn validate(&self) -> trustformers_core::errors::Result<()> {
if self.hidden_size == 0 {
return Err(
trustformers_core::errors::TrustformersError::invalid_config(
"hidden_size must be greater than 0".to_string(),
),
);
}
if !self.hidden_size.is_multiple_of(self.num_attention_heads) {
return Err(
trustformers_core::errors::TrustformersError::invalid_config(
"hidden_size must be divisible by num_attention_heads".to_string(),
),
);
}
if self.vocab_size == 0 {
return Err(
trustformers_core::errors::TrustformersError::invalid_config(
"vocab_size must be greater than 0".to_string(),
),
);
}
if self.num_hidden_layers == 0 {
return Err(
trustformers_core::errors::TrustformersError::invalid_config(
"num_hidden_layers must be greater than 0".to_string(),
),
);
}
if self.intermediate_size == 0 {
return Err(
trustformers_core::errors::TrustformersError::invalid_config(
"intermediate_size must be greater than 0".to_string(),
),
);
}
Ok(())
}
fn architecture(&self) -> &'static str {
"Phi-2"
}
}
impl Phi2Config {
pub fn head_dim(&self) -> usize {
self.hidden_size / self.num_attention_heads
}
pub fn phi2_2_7b() -> Self {
Self {
vocab_size: 51200,
hidden_size: 2560,
intermediate_size: 10240,
num_hidden_layers: 32,
num_attention_heads: 32,
max_position_embeddings: 2048,
rope_theta: 10000.0,
layer_norm_eps: 1e-5,
initializer_range: 0.02,
}
}
pub fn small_test() -> Self {
Self {
vocab_size: 256,
hidden_size: 64,
intermediate_size: 256,
num_hidden_layers: 2,
num_attention_heads: 4,
max_position_embeddings: 64,
rope_theta: 10000.0,
layer_norm_eps: 1e-5,
initializer_range: 0.02,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use trustformers_core::traits::Config;
#[test]
fn test_default_is_phi2_2_7b() {
let cfg = Phi2Config::default();
assert_eq!(cfg.vocab_size, 51200);
assert_eq!(cfg.hidden_size, 2560);
assert_eq!(cfg.num_hidden_layers, 32);
}
#[test]
fn test_phi2_2_7b_preset() {
let cfg = Phi2Config::phi2_2_7b();
assert_eq!(cfg.hidden_size, 2560);
assert_eq!(cfg.intermediate_size, 10240);
assert_eq!(cfg.num_hidden_layers, 32);
assert_eq!(cfg.num_attention_heads, 32);
assert_eq!(cfg.max_position_embeddings, 2048);
}
#[test]
fn test_small_test_preset() {
let cfg = Phi2Config::small_test();
assert_eq!(cfg.vocab_size, 256);
assert_eq!(cfg.hidden_size, 64);
assert_eq!(cfg.num_hidden_layers, 2);
assert_eq!(cfg.num_attention_heads, 4);
}
#[test]
fn test_head_dim_2_7b() {
assert_eq!(Phi2Config::phi2_2_7b().head_dim(), 80);
}
#[test]
fn test_head_dim_small() {
assert_eq!(Phi2Config::small_test().head_dim(), 16);
}
#[test]
fn test_rope_theta_is_10000() {
assert!((Phi2Config::default().rope_theta - 10000.0).abs() < 1e-3);
}
#[test]
fn test_initializer_range_positive() {
assert!(Phi2Config::default().initializer_range > 0.0);
}
#[test]
fn test_architecture_label() {
assert_eq!(Phi2Config::default().architecture(), "Phi-2");
}
#[test]
fn test_intermediate_is_4x_hidden() {
let cfg = Phi2Config::phi2_2_7b();
assert_eq!(cfg.intermediate_size, 4 * cfg.hidden_size);
}
#[test]
fn test_validate_default_ok() {
assert!(Phi2Config::default().validate().is_ok());
}
#[test]
fn test_validate_small_test_ok() {
assert!(Phi2Config::small_test().validate().is_ok());
}
#[test]
fn test_validate_zero_hidden_size() {
let mut cfg = Phi2Config::small_test();
cfg.hidden_size = 0;
assert!(cfg.validate().is_err());
}
#[test]
fn test_validate_hidden_not_divisible_by_heads() {
let mut cfg = Phi2Config::small_test();
cfg.hidden_size = 65;
assert!(cfg.validate().is_err());
}
#[test]
fn test_validate_zero_vocab_size() {
let mut cfg = Phi2Config::small_test();
cfg.vocab_size = 0;
assert!(cfg.validate().is_err());
}
#[test]
fn test_validate_zero_layers() {
let mut cfg = Phi2Config::small_test();
cfg.num_hidden_layers = 0;
assert!(cfg.validate().is_err());
}
#[test]
fn test_validate_zero_intermediate_size() {
let mut cfg = Phi2Config::small_test();
cfg.intermediate_size = 0;
assert!(cfg.validate().is_err());
}
#[test]
fn test_clone_preserves_all_fields() {
let cfg = Phi2Config::phi2_2_7b();
let cloned = cfg.clone();
assert_eq!(cfg.vocab_size, cloned.vocab_size);
assert_eq!(cfg.rope_theta, cloned.rope_theta);
}
#[test]
fn test_lcg_varied_position_embeddings() {
let mut s = 31u64;
for _ in 0..5 {
s = s.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
let max_pos = ((s % 1024) + 64) as usize;
let mut cfg = Phi2Config::small_test();
cfg.max_position_embeddings = max_pos;
assert!(cfg.validate().is_ok(), "max_pos={max_pos} failed");
}
}
}