pub mod config;
pub mod model;
pub mod tasks;
pub use config::{Phi4Config, Phi4RopeScaling};
pub use model::{
Phi4Attention, Phi4DecoderLayer, Phi4MLP, Phi4Model, Phi4RmsNorm, Phi4RotaryEmbedding,
};
pub use tasks::{Phi4Error, Phi4ForCausalLM};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_phi4_config_defaults() {
let cfg = Phi4Config::default();
assert_eq!(cfg.vocab_size, 100352);
assert_eq!(cfg.hidden_size, 5120);
assert_eq!(cfg.intermediate_size, 17920);
assert_eq!(cfg.num_hidden_layers, 40);
assert_eq!(cfg.num_attention_heads, 40);
assert_eq!(cfg.num_key_value_heads, 10);
assert_eq!(cfg.head_dim, 128);
assert_eq!(cfg.max_position_embeddings, 16384);
assert_eq!(cfg.original_max_position_embeddings, 4096);
assert!((cfg.rms_norm_eps - 1e-5).abs() < 1e-10);
}
#[test]
fn test_phi4_14b_preset() {
let cfg = Phi4Config::phi4_14b();
assert_eq!(cfg.vocab_size, 100352);
assert_eq!(cfg.num_hidden_layers, 40);
assert_eq!(cfg.hidden_size, 5120);
assert_eq!(cfg.intermediate_size, 17920);
assert_eq!(cfg.num_attention_heads, 40);
assert_eq!(cfg.num_key_value_heads, 10);
assert_eq!(cfg.head_dim, 128);
assert!(cfg.tie_word_embeddings);
assert!(cfg.rope_scaling.is_none());
}
#[test]
fn test_phi4_mini_preset() {
let cfg = Phi4Config::phi4_mini();
assert_eq!(cfg.hidden_size, 3072);
assert_eq!(cfg.num_hidden_layers, 32);
assert_eq!(cfg.num_attention_heads, 32);
assert_eq!(cfg.num_key_value_heads, 8);
assert_eq!(cfg.intermediate_size, 8192);
assert_eq!(cfg.vocab_size, 100352);
}
#[test]
fn test_phi4_longrope_config() {
let cfg = Phi4Config::phi4_14b_longrope();
assert!(cfg.rope_scaling.is_some());
let rs = cfg.rope_scaling.as_ref().unwrap();
assert_eq!(rs.rope_type, "longrope");
assert!(!rs.short_factor.is_empty());
assert!(!rs.long_factor.is_empty());
assert_eq!(rs.original_max_position_embeddings, 4096);
assert_eq!(cfg.max_position_embeddings, 131072);
}
#[test]
fn test_phi4_tied_embeddings() {
let cfg14 = Phi4Config::phi4_14b();
assert!(
cfg14.tie_word_embeddings,
"Phi-4 14B must have tied embeddings"
);
let cfg_mini = Phi4Config::phi4_mini();
assert!(
cfg_mini.tie_word_embeddings,
"Phi-4 mini must have tied embeddings"
);
}
#[test]
fn test_phi4_gqa_ratio() {
let cfg = Phi4Config::phi4_14b();
assert_eq!(cfg.gqa_ratio(), 4);
assert_eq!(cfg.num_attention_heads / cfg.num_key_value_heads, 4);
}
#[test]
fn test_phi4_validate_ok() {
let cfg = Phi4Config::phi4_14b();
assert!(cfg.validate().is_ok());
}
#[test]
fn test_phi4_validate_bad_kv_heads() {
let mut cfg = Phi4Config::phi4_14b();
cfg.num_key_value_heads = 7; let result = cfg.validate();
assert!(result.is_err());
}
#[test]
fn test_phi4_rope_theta() {
let cfg = Phi4Config::default();
assert!(
(cfg.rope_theta - 250000.0).abs() < 1.0,
"rope_theta should be 250000, got {}",
cfg.rope_theta
);
}
#[test]
fn test_phi4_vocab_size() {
let cfg = Phi4Config::phi4_14b();
assert_eq!(cfg.vocab_size, 100352);
}
fn small_phi4_config() -> Phi4Config {
Phi4Config {
vocab_size: 256,
hidden_size: 64,
intermediate_size: 128,
num_hidden_layers: 2,
num_attention_heads: 4,
num_key_value_heads: 2,
head_dim: 16,
max_position_embeddings: 512,
original_max_position_embeddings: 256,
rms_norm_eps: 1e-5,
rope_theta: 250000.0,
hidden_act: "silu".to_string(),
tie_word_embeddings: true,
attention_dropout: 0.0,
embd_dropout: 0.0,
rope_scaling: None,
}
}
#[test]
fn test_phi4_14b_rope_theta() {
let cfg = Phi4Config::phi4_14b();
assert!(
(cfg.rope_theta - 250000.0).abs() < 1.0,
"phi4_14b rope_theta must be 250000, got {}",
cfg.rope_theta
);
}
#[test]
fn test_phi4_mini_rope_theta() {
let cfg = Phi4Config::phi4_mini();
assert!(
(cfg.rope_theta - 250000.0).abs() < 1.0,
"phi4_mini rope_theta must be 250000, got {}",
cfg.rope_theta
);
}
#[test]
fn test_phi4_mini_head_dim() {
let cfg = Phi4Config::phi4_mini();
assert_eq!(cfg.head_dim, 96, "phi4_mini head_dim must be 96");
}
#[test]
fn test_phi4_mini_gqa_ratio() {
let cfg = Phi4Config::phi4_mini();
assert_eq!(cfg.gqa_ratio(), 4, "phi4_mini gqa_ratio must be 4");
assert_eq!(cfg.num_attention_heads / cfg.num_key_value_heads, 4);
}
#[test]
fn test_phi4_14b_gqa_ratio_explicit() {
let cfg = Phi4Config::phi4_14b();
assert_eq!(cfg.gqa_ratio(), 4);
assert_eq!(cfg.num_attention_heads, 40);
assert_eq!(cfg.num_key_value_heads, 10);
}
#[test]
fn test_phi4_longrope_short_factor_length() {
let cfg = Phi4Config::phi4_14b_longrope();
let rs = cfg.rope_scaling.as_ref().expect("rope_scaling must be Some");
assert_eq!(
rs.short_factor.len(),
32,
"short_factor must have 32 entries"
);
}
#[test]
fn test_phi4_longrope_long_factor_length() {
let cfg = Phi4Config::phi4_14b_longrope();
let rs = cfg.rope_scaling.as_ref().expect("rope_scaling must be Some");
assert_eq!(rs.long_factor.len(), 32, "long_factor must have 32 entries");
}
#[test]
fn test_phi4_longrope_original_max_position() {
let cfg = Phi4Config::phi4_14b_longrope();
let rs = cfg.rope_scaling.as_ref().expect("rope_scaling must be Some");
assert_eq!(rs.original_max_position_embeddings, 4096);
}
#[test]
fn test_phi4_longrope_max_position() {
let cfg = Phi4Config::phi4_14b_longrope();
assert_eq!(cfg.max_position_embeddings, 131072);
}
#[test]
fn test_phi4_longrope_long_mscale_gt1() {
let cfg = Phi4Config::phi4_14b_longrope();
let rs = cfg.rope_scaling.as_ref().expect("rope_scaling must be Some");
assert!(
rs.long_mscale > 1.0,
"long_mscale must be > 1.0, got {}",
rs.long_mscale
);
}
#[test]
fn test_phi4_longrope_short_mscale() {
let cfg = Phi4Config::phi4_14b_longrope();
let rs = cfg.rope_scaling.as_ref().expect("rope_scaling must be Some");
assert!(
(rs.short_mscale - 1.0).abs() < 1e-6,
"short_mscale must be 1.0"
);
}
#[test]
fn test_phi4_validate_fails_hidden_not_divisible_by_heads() {
let mut cfg = Phi4Config::phi4_14b();
cfg.num_attention_heads = 41; assert!(
cfg.validate().is_err(),
"hidden_size not divisible by num_attention_heads must fail"
);
}
#[test]
fn test_phi4_validate_fails_vocab_zero() {
let mut cfg = Phi4Config::phi4_14b();
cfg.vocab_size = 0;
assert!(cfg.validate().is_err(), "vocab_size=0 should fail");
}
#[test]
fn test_phi4_validate_fails_heads_zero() {
let mut cfg = small_phi4_config();
cfg.num_attention_heads = 0;
assert!(cfg.validate().is_err(), "num_attention_heads=0 should fail");
}
#[test]
fn test_phi4_validate_fails_head_dim_zero() {
let mut cfg = small_phi4_config();
cfg.head_dim = 0;
assert!(cfg.validate().is_err(), "head_dim=0 should fail");
}
#[test]
fn test_phi4_validate_fails_layers_zero() {
let mut cfg = small_phi4_config();
cfg.num_hidden_layers = 0;
assert!(cfg.validate().is_err(), "num_hidden_layers=0 should fail");
}
#[test]
fn test_phi4_validate_fails_rms_norm_eps_zero() {
let mut cfg = small_phi4_config();
cfg.rms_norm_eps = 0.0;
assert!(cfg.validate().is_err(), "rms_norm_eps=0 should fail");
}
#[test]
fn test_phi4_validate_fails_rope_theta_zero() {
let mut cfg = small_phi4_config();
cfg.rope_theta = 0.0;
assert!(cfg.validate().is_err(), "rope_theta=0 should fail");
}
#[test]
fn test_phi4_error_display_empty_input() {
let err = Phi4Error::EmptyInput;
let s = err.to_string();
assert!(
s.to_lowercase().contains("empty"),
"EmptyInput display should mention empty, got: {s}"
);
}
#[test]
fn test_phi4_error_display_invalid_config() {
let err = Phi4Error::InvalidConfig("bad config value".to_string());
let s = err.to_string();
assert!(
s.contains("bad config value"),
"should include message, got: {s}"
);
}
#[test]
fn test_phi4_error_display_sequence_too_long() {
let err = Phi4Error::SequenceTooLong {
max: 16384,
got: 32768,
};
let s = err.to_string();
assert!(s.contains("16384"), "should contain max, got: {s}");
assert!(s.contains("32768"), "should contain got, got: {s}");
}
#[test]
fn test_phi4_error_display_shape_mismatch() {
let err = Phi4Error::ShapeMismatch {
expected: vec![1, 128],
got: vec![1, 64],
};
let s = err.to_string();
assert!(s.contains("128"), "should contain expected dim, got: {s}");
assert!(s.contains("64"), "should contain got dim, got: {s}");
}
#[test]
fn test_phi4_config_clone() {
let cfg = Phi4Config::phi4_14b_longrope();
let cloned = cfg.clone();
assert!(
cloned.rope_scaling.is_some(),
"clone must preserve rope_scaling"
);
assert_eq!(cloned.tie_word_embeddings, cfg.tie_word_embeddings);
assert_eq!(cloned.vocab_size, cfg.vocab_size);
assert_eq!(
cloned.rope_scaling.as_ref().map(|r| r.rope_type.as_str()),
cfg.rope_scaling.as_ref().map(|r| r.rope_type.as_str())
);
}
#[test]
fn test_phi4_config_debug() {
let cfg = Phi4Config::phi4_14b();
let s = format!("{:?}", cfg);
assert!(
s.contains("Phi4Config"),
"debug must contain type name, got: {s}"
);
assert!(
s.contains("vocab_size"),
"debug must contain vocab_size, got: {s}"
);
}
#[test]
fn test_phi4_architecture_string() {
use trustformers_core::traits::Config;
let cfg = Phi4Config::phi4_14b();
assert_eq!(cfg.architecture(), "Phi-4");
}
#[test]
fn test_phi4_causal_lm_new_small() {
let cfg = small_phi4_config();
assert!(
Phi4ForCausalLM::new(cfg).is_ok(),
"new() with valid small config must succeed"
);
}
#[test]
fn test_phi4_generate_greedy_token_count() {
let cfg = small_phi4_config();
let model = Phi4ForCausalLM::new(cfg).expect("new");
let result = model.generate_greedy(&[1u32, 2], 5);
match result {
Ok(generated) => {
assert_eq!(
generated.len(),
5,
"generate_greedy must return max_new_tokens tokens"
);
},
Err(e) => {
let msg = e.to_string();
assert!(
msg.contains("forward") || msg.contains("Linear") || msg.contains("dimension"),
"unexpected error variant: {msg}"
);
},
}
}
#[test]
fn test_phi4_generate_greedy_empty_input_error() {
let cfg = small_phi4_config();
let model = Phi4ForCausalLM::new(cfg).expect("new");
let result = model.generate_greedy(&[], 3);
assert!(result.is_err(), "empty input must return Err");
let err_str = result.unwrap_err().to_string();
assert!(
err_str.to_lowercase().contains("empty"),
"error must mention 'empty', got: {err_str}"
);
}
#[test]
fn test_phi4_original_max_position_embeddings() {
assert_eq!(
Phi4Config::phi4_14b().original_max_position_embeddings,
4096
);
assert_eq!(
Phi4Config::phi4_mini().original_max_position_embeddings,
4096
);
}
#[test]
fn test_phi4_14b_kv_heads() {
let cfg = Phi4Config::phi4_14b();
assert_eq!(cfg.num_key_value_heads, 10);
}
#[test]
fn test_phi4_mini_kv_heads() {
let cfg = Phi4Config::phi4_mini();
assert_eq!(cfg.num_key_value_heads, 8);
}
#[test]
fn test_phi4_14b_intermediate_size() {
let cfg = Phi4Config::phi4_14b();
assert_eq!(cfg.intermediate_size, 17920);
}
#[test]
fn test_phi4_error_display_forward_error() {
let err = Phi4Error::ForwardError("some forward issue".to_string());
let s = err.to_string();
assert!(
s.contains("some forward issue"),
"ForwardError should include message, got: {s}"
);
}
}