pub mod config;
pub mod model;
pub mod tasks;
pub use config::*;
pub use model::*;
pub use tasks::*;
#[cfg(test)]
mod tests {
use crate::internlm2::{
config::InternLm2Config,
model::{
InternLm2Attention, InternLm2DecoderLayer, InternLm2Error, InternLm2MLP,
InternLm2Model, InternLm2RmsNorm, InternLm2RotaryEmbedding,
},
tasks::InternLm2ForCausalLM,
};
fn tiny_config() -> InternLm2Config {
InternLm2Config::new(
128, 64, 2, 4, 2, 128, 512, 10000.0, None, "silu", 1e-5, false, true, )
}
#[test]
fn test_config_default() {
let cfg = InternLm2Config::default();
assert_eq!(cfg.vocab_size, 92544);
assert_eq!(cfg.hidden_size, 4096);
assert_eq!(cfg.num_hidden_layers, 32);
assert_eq!(cfg.num_attention_heads, 32);
assert_eq!(cfg.num_key_value_heads, 8);
assert_eq!(cfg.intermediate_size, 14336);
assert_eq!(cfg.max_position_embeddings, 32768);
assert!((cfg.rope_theta - 1_000_000.0).abs() < 1.0);
assert!(cfg.rope_scaling.is_none());
assert_eq!(cfg.hidden_act, "silu");
assert!(!cfg.tie_word_embeddings);
assert!(cfg.use_cache);
}
#[test]
fn test_internlm2_7b_preset() {
let cfg = InternLm2Config::internlm2_7b();
assert_eq!(cfg.vocab_size, 92544);
assert_eq!(cfg.hidden_size, 4096);
assert_eq!(cfg.num_hidden_layers, 32);
assert_eq!(cfg.num_attention_heads, 32);
assert_eq!(cfg.num_key_value_heads, 8);
assert_eq!(cfg.gqa_ratio(), 4);
assert_eq!(cfg.head_dim(), 128);
}
#[test]
fn test_internlm2_20b_preset() {
let cfg = InternLm2Config::internlm2_20b();
assert_eq!(cfg.hidden_size, 6144);
assert_eq!(cfg.num_hidden_layers, 48);
assert_eq!(cfg.num_attention_heads, 48);
assert_eq!(cfg.num_key_value_heads, 8);
assert_eq!(cfg.intermediate_size, 16384);
assert_eq!(cfg.gqa_ratio(), 6);
assert_eq!(cfg.head_dim(), 128);
assert_eq!(cfg.vocab_size, 92544);
}
#[test]
fn test_config_computed_properties() {
let cfg = tiny_config();
assert_eq!(cfg.head_dim(), 16);
assert_eq!(cfg.gqa_ratio(), 2);
}
#[test]
fn test_rope_rotation_changes_values() {
let rope = InternLm2RotaryEmbedding::new(10000.0, None);
let head_dim = 8;
let seq_len = 4;
let q: Vec<f32> = (0..seq_len * head_dim).map(|i| i as f32 * 0.1).collect();
let k = q.clone();
let (q_rot, k_rot) = rope.apply(&q, &k, seq_len, head_dim);
assert_eq!(q_rot.len(), q.len());
assert_eq!(k_rot.len(), k.len());
let changed = q.iter().zip(q_rot.iter()).any(|(a, b)| (a - b).abs() > 1e-6);
assert!(changed, "RoPE should modify values for non-zero positions");
}
#[test]
fn test_rmsnorm_unit_rms_output() {
let x = vec![1.0f32, 2.0, 3.0, 4.0];
let w = vec![1.0f32; 4];
let out = InternLm2RmsNorm::forward(&x, &w, 1e-5);
assert_eq!(out.len(), 4);
let rms: f32 = (out.iter().map(|v| v * v).sum::<f32>() / 4.0).sqrt();
assert!(
(rms - 1.0).abs() < 0.01,
"RMS of normed output ≈ 1, got {rms}"
);
}
#[test]
fn test_attention_output_shape() {
let cfg = tiny_config();
let h = cfg.hidden_size;
let seq_len = 5;
let attn = InternLm2Attention::new(cfg, 0);
let input = vec![0.5f32; seq_len * h];
let out = attn.forward(&input, seq_len);
assert_eq!(
out.len(),
seq_len * h,
"attention output must have shape [seq_len * hidden_size]"
);
}
#[test]
fn test_gqa_head_mapping() {
let cfg = tiny_config(); let attn = InternLm2Attention::new(cfg, 0);
assert_eq!(attn.kv_head_for_q(0), 0);
assert_eq!(attn.kv_head_for_q(1), 0);
assert_eq!(attn.kv_head_for_q(2), 1);
assert_eq!(attn.kv_head_for_q(3), 1);
}
#[test]
fn test_mlp_swiglu_output_shape() {
let cfg = tiny_config();
let h = cfg.hidden_size;
let mlp = InternLm2MLP::new(&cfg);
let input = vec![1.0f32; h];
let out = mlp.forward(&input);
assert_eq!(out.len(), h, "MLP output length must equal hidden_size");
}
#[test]
fn test_decoder_layer_output_shape() {
let cfg = tiny_config();
let h = cfg.hidden_size;
let seq_len = 3;
let layer = InternLm2DecoderLayer::new(cfg, 0);
let input = vec![0.2f32; seq_len * h];
let out = layer.forward(&input, seq_len);
assert_eq!(out.len(), seq_len * h);
}
#[test]
fn test_model_creation_layer_count() {
let cfg = tiny_config();
let num_layers = cfg.num_hidden_layers;
let model = InternLm2Model::new(cfg);
assert_eq!(model.layers.len(), num_layers);
}
#[test]
fn test_model_forward_output_shape() {
let cfg = tiny_config();
let h = cfg.hidden_size;
let model = InternLm2Model::new(cfg);
let input_ids = vec![1u32, 2, 3];
let out = model.forward(&input_ids).expect("forward should succeed");
assert_eq!(out.len(), 3 * h, "output must be [seq_len * hidden_size]");
}
#[test]
fn test_causal_lm_logits_shape() {
let cfg = tiny_config();
let v = cfg.vocab_size;
let lm = InternLm2ForCausalLM::new(cfg);
let input_ids = vec![1u32, 2, 3, 4];
let logits = lm.forward(&input_ids).expect("forward should succeed");
assert_eq!(logits.len(), 4 * v, "logits must be [seq_len * vocab_size]");
}
#[test]
fn test_causal_lm_greedy_generation() {
let cfg = tiny_config();
let lm = InternLm2ForCausalLM::new(cfg);
let input_ids = vec![1u32, 2];
let generated = lm.generate(&input_ids, 4).expect("generate should succeed");
assert_eq!(
generated.len(),
4,
"should return exactly max_new_tokens tokens"
);
for tok in &generated {
assert!(
(*tok as usize) < 128,
"all tokens must be within vocab range"
);
}
}
#[test]
fn test_chatml_prompt_format() {
let prompt = InternLm2ForCausalLM::format_chat_prompt("Be helpful.", "What is 2+2?");
assert!(prompt.contains("<|im_start|>system\nBe helpful.<|im_end|>"));
assert!(prompt.contains("<|im_start|>user\nWhat is 2+2?<|im_end|>"));
assert!(prompt.ends_with("<|im_start|>assistant\n"));
}
#[test]
fn test_error_display_variants() {
let inv = InternLm2Error::InvalidInput("bad token".to_string());
let fwd = InternLm2Error::ForwardError("NaN".to_string());
assert!(inv.to_string().contains("bad token"));
assert!(fwd.to_string().contains("NaN"));
let _boxed: Box<dyn std::error::Error> = Box::new(InternLm2Error::InvalidInput("x".into()));
}
#[test]
fn test_empty_input_returns_error() {
let cfg = tiny_config();
let model = InternLm2Model::new(cfg);
let err = model.forward(&[]);
assert!(err.is_err(), "empty input must return an error");
}
#[test]
fn test_oov_token_returns_error() {
let cfg = tiny_config();
let model = InternLm2Model::new(cfg);
let err = model.forward(&[128u32]);
assert!(err.is_err(), "OOV token should return an error");
assert!(
model.forward(&[127u32]).is_ok(),
"last valid token should succeed"
);
}
#[test]
fn test_ntk_rope_scaling_differs_from_base() {
let base = InternLm2RotaryEmbedding::new(10000.0, None);
let ntk = InternLm2RotaryEmbedding::new(10000.0, Some(2.0));
let head_dim = 16;
let seq_len = 4;
let q: Vec<f32> = (0..seq_len * head_dim).map(|i| (i as f32) * 0.1).collect();
let k = q.clone();
let (q_base, _) = base.apply(&q, &k, seq_len, head_dim);
let (q_ntk, _) = ntk.apply(&q, &k, seq_len, head_dim);
let differs = q_base.iter().zip(q_ntk.iter()).any(|(a, b)| (a - b).abs() > 1e-6);
assert!(differs, "NTK scaling must produce different rotations");
}
#[test]
fn test_rmsnorm_preserves_sign() {
let x = vec![3.0f32, -3.0, 0.0, 6.0];
let w = vec![1.0f32; 4];
let out = InternLm2RmsNorm::forward(&x, &w, 1e-8);
assert!(
out[0] > 0.0,
"positive inputs should remain positive after RMSNorm"
);
assert!(
out[1] < 0.0,
"negative inputs should remain negative after RMSNorm"
);
}
}