use crate::internlm2::config::InternLm2Config;
use crate::internlm2::model::{InternLm2Error, InternLm2Model};
pub struct InternLm2ForCausalLM {
pub model: InternLm2Model,
pub lm_head_weight: Vec<f32>,
}
impl InternLm2ForCausalLM {
pub fn new(config: InternLm2Config) -> Self {
let v = config.vocab_size;
let h = config.hidden_size;
let model = InternLm2Model::new(config);
let lm_head_weight = vec![0.0_f32; v * h];
Self {
model,
lm_head_weight,
}
}
pub fn forward(&self, input_ids: &[u32]) -> Result<Vec<f32>, InternLm2Error> {
let hidden = self.model.forward(input_ids)?;
let seq_len = input_ids.len();
let h = self.model.config.hidden_size;
let v = self.model.config.vocab_size;
let mut logits = vec![0.0_f32; seq_len * v];
for pos in 0..seq_len {
let h_slice = &hidden[pos * h..(pos + 1) * h];
for vocab_idx in 0..v {
let weight_slice = &self.lm_head_weight[vocab_idx * h..(vocab_idx + 1) * h];
let dot: f32 = h_slice.iter().zip(weight_slice.iter()).map(|(x, w)| x * w).sum();
logits[pos * v + vocab_idx] = dot;
}
}
Ok(logits)
}
pub fn generate(
&self,
input_ids: &[u32],
max_new_tokens: usize,
) -> Result<Vec<u32>, InternLm2Error> {
if input_ids.is_empty() {
return Err(InternLm2Error::InvalidInput(
"input_ids must not be empty for generation".to_string(),
));
}
let v = self.model.config.vocab_size;
let mut ids: Vec<u32> = input_ids.to_vec();
for _ in 0..max_new_tokens {
let logits = self.forward(&ids)?;
let seq_len = ids.len();
let last_logits = &logits[(seq_len - 1) * v..seq_len * v];
let next_token = last_logits
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map(|(idx, _)| idx as u32)
.ok_or_else(|| InternLm2Error::ForwardError("empty logits".to_string()))?;
ids.push(next_token);
}
Ok(ids[input_ids.len()..].to_vec())
}
pub fn format_chat_prompt(system: &str, user: &str) -> String {
format!(
"<|im_start|>system\n{system}<|im_end|>\n<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n"
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::internlm2::{
config::InternLm2Config,
model::{
InternLm2Attention, InternLm2Error, InternLm2MLP, InternLm2RmsNorm,
InternLm2RotaryEmbedding,
},
};
fn tiny_config() -> InternLm2Config {
InternLm2Config::new(
128, 64, 2, 4, 2, 128, 512, 10000.0, None, "silu", 1e-5, false, true,
)
}
#[test]
fn test_internlm2_config_default() {
let cfg = InternLm2Config::default();
assert_eq!(cfg.vocab_size, 92544);
assert_eq!(cfg.hidden_size, 4096);
assert_eq!(cfg.num_hidden_layers, 32);
assert_eq!(cfg.num_attention_heads, 32);
assert_eq!(cfg.num_key_value_heads, 8);
assert_eq!(cfg.intermediate_size, 14336);
assert_eq!(cfg.max_position_embeddings, 32768);
assert!((cfg.rope_theta - 1_000_000.0).abs() < 1.0);
assert!(cfg.rope_scaling.is_none());
assert_eq!(cfg.hidden_act, "silu");
assert!((cfg.rms_norm_eps - 1e-5).abs() < 1e-10);
assert!(!cfg.tie_word_embeddings);
assert!(cfg.use_cache);
}
#[test]
fn test_internlm2_config_custom() {
let cfg = tiny_config();
assert_eq!(cfg.vocab_size, 128);
assert_eq!(cfg.hidden_size, 64);
assert_eq!(cfg.num_key_value_heads, 2);
assert_eq!(cfg.gqa_ratio(), 2);
assert_eq!(cfg.head_dim(), 16);
}
#[test]
fn test_internlm2_rope_basic() {
let rope = InternLm2RotaryEmbedding::new(10000.0, None);
let seq_len = 4;
let head_dim = 8;
let q: Vec<f32> = (0..seq_len * head_dim).map(|i| i as f32 * 0.1).collect();
let k = q.clone();
let (q_rot, k_rot) = rope.apply(&q, &k, seq_len, head_dim);
assert_eq!(q_rot.len(), q.len());
assert_eq!(k_rot.len(), k.len());
let changed = q.iter().zip(q_rot.iter()).any(|(a, b)| (a - b).abs() > 1e-6);
assert!(changed, "RoPE should modify at least some values");
}
#[test]
fn test_internlm2_rope_ntk_scaling() {
let rope_base = InternLm2RotaryEmbedding::new(10000.0, None);
let rope_ntk = InternLm2RotaryEmbedding::new(10000.0, Some(2.0));
let seq_len = 4;
let head_dim = 8;
let q: Vec<f32> = (0..seq_len * head_dim).map(|i| i as f32 * 0.1).collect();
let k = q.clone();
let (q_base, _) = rope_base.apply(&q, &k, seq_len, head_dim);
let (q_ntk, _) = rope_ntk.apply(&q, &k, seq_len, head_dim);
let differs = q_base.iter().zip(q_ntk.iter()).any(|(a, b)| (a - b).abs() > 1e-6);
assert!(differs, "NTK scaling should change the rotations");
}
#[test]
fn test_internlm2_rms_norm() {
let x = vec![1.0_f32, 2.0, 3.0, 4.0];
let weight = vec![1.0_f32; 4];
let out = InternLm2RmsNorm::forward(&x, &weight, 1e-5);
assert_eq!(out.len(), 4);
let rms_out: f32 = (out.iter().map(|v| v * v).sum::<f32>() / 4.0).sqrt();
assert!(
(rms_out - 1.0).abs() < 0.01,
"RMS of normed output should be ~1, got {rms_out}"
);
}
#[test]
fn test_internlm2_attention_shape() {
let cfg = tiny_config();
let h = cfg.hidden_size;
let seq_len = 4;
let attn = InternLm2Attention::new(cfg, 0);
let input = vec![0.5_f32; seq_len * h];
let out = attn.forward(&input, seq_len);
assert_eq!(
out.len(),
seq_len * h,
"Attention output must have shape [seq_len * hidden_size]"
);
}
#[test]
fn test_internlm2_gqa_head_mapping() {
let cfg = tiny_config(); let attn = InternLm2Attention::new(cfg, 0);
assert_eq!(attn.kv_head_for_q(0), 0);
assert_eq!(attn.kv_head_for_q(1), 0);
assert_eq!(attn.kv_head_for_q(2), 1);
assert_eq!(attn.kv_head_for_q(3), 1);
}
#[test]
fn test_internlm2_mlp_swiglu() {
let cfg = tiny_config();
let h = cfg.hidden_size;
let mlp = InternLm2MLP::new(&cfg);
let input = vec![1.0_f32; h];
let out = mlp.forward(&input);
assert_eq!(out.len(), h, "MLP output must match hidden_size");
}
#[test]
fn test_internlm2_decoder_layer() {
let cfg = tiny_config();
let h = cfg.hidden_size;
let seq_len = 4;
use crate::internlm2::model::InternLm2DecoderLayer;
let layer = InternLm2DecoderLayer::new(cfg, 0);
let input = vec![0.1_f32; seq_len * h];
let out = layer.forward(&input, seq_len);
assert_eq!(out.len(), seq_len * h);
}
#[test]
fn test_internlm2_model_forward() {
let cfg = tiny_config();
let h = cfg.hidden_size;
let model = InternLm2Model::new(cfg);
let input_ids = vec![1_u32, 2, 3, 4];
let out = model.forward(&input_ids).expect("forward should succeed");
assert_eq!(out.len(), 4 * h);
}
#[test]
fn test_internlm2_causal_lm_forward() {
let cfg = tiny_config();
let v = cfg.vocab_size;
let lm = InternLm2ForCausalLM::new(cfg);
let input_ids = vec![1_u32, 2, 3];
let logits = lm.forward(&input_ids).expect("forward should succeed");
assert_eq!(
logits.len(),
3 * v,
"logits shape must be [seq_len * vocab_size]"
);
}
#[test]
fn test_internlm2_generate_greedy() {
let cfg = tiny_config();
let lm = InternLm2ForCausalLM::new(cfg);
let input_ids = vec![1_u32, 2];
let generated = lm.generate(&input_ids, 3).expect("generate should succeed");
assert_eq!(
generated.len(),
3,
"generate should return max_new_tokens new tokens"
);
for tok in &generated {
assert!(*tok < 128, "generated tokens must be within vocab");
}
}
#[test]
fn test_internlm2_chat_format() {
let prompt = InternLm2ForCausalLM::format_chat_prompt("You are helpful.", "Hello!");
assert!(prompt.contains("<|im_start|>system"));
assert!(prompt.contains("You are helpful."));
assert!(prompt.contains("<|im_end|>"));
assert!(prompt.contains("<|im_start|>user"));
assert!(prompt.contains("Hello!"));
assert!(prompt.contains("<|im_start|>assistant"));
}
#[test]
fn test_internlm2_model_new() {
let cfg = tiny_config();
let num_layers = cfg.num_hidden_layers;
let model = InternLm2Model::new(cfg);
assert_eq!(model.layers.len(), num_layers);
}
#[test]
fn test_internlm2_error_display() {
let err1 = InternLm2Error::InvalidInput("bad token".to_string());
let err2 = InternLm2Error::ForwardError("NaN detected".to_string());
assert!(err1.to_string().contains("bad token"));
assert!(err2.to_string().contains("NaN detected"));
let _boxed: Box<dyn std::error::Error> = Box::new(InternLm2Error::InvalidInput("x".into()));
}
#[test]
fn test_internlm2_large_vocab() {
let cfg = tiny_config();
let model = InternLm2Model::new(cfg);
let ok = model.forward(&[127_u32]);
assert!(ok.is_ok(), "token at vocab boundary should succeed");
let err = model.forward(&[128_u32]);
assert!(
err.is_err(),
"token out of vocabulary should return an error"
);
}
#[test]
fn test_internlm2_empty_input_error() {
let cfg = tiny_config();
let model = InternLm2Model::new(cfg);
let err = model.forward(&[]);
assert!(err.is_err(), "empty input should return an error");
}
#[test]
fn test_internlm2_rms_norm_identity_weight() {
let x = vec![2.0_f32, 0.0, -2.0, 0.0];
let weight = vec![1.0_f32; 4];
let out = InternLm2RmsNorm::forward(&x, &weight, 1e-8);
assert!(out[0] > 0.0);
assert!(out[2] < 0.0);
}
}