use crate::starcoder2::{
config::StarCoder2Config,
fim::{format_fim_prompt, parse_fim_output, FimTokens},
model::{
StarCoder2Attention, StarCoder2ForCausalLM, StarCoder2MLP, StarCoder2Model,
StarCoder2RmsNorm, StarCoder2RotaryEmbedding,
},
};
use trustformers_core::{tensor::Tensor, traits::Config, traits::Layer, traits::Model};
#[test]
fn test_config_3b_preset() {
let cfg = StarCoder2Config::starcoder2_3b();
assert_eq!(cfg.vocab_size, 49152);
assert_eq!(cfg.hidden_size, 3072);
assert_eq!(cfg.intermediate_size, 12288);
assert_eq!(cfg.num_hidden_layers, 30);
assert_eq!(cfg.num_attention_heads, 24);
assert_eq!(cfg.num_key_value_heads, 2);
assert!(cfg.use_bias);
assert!(cfg.validate().is_ok());
}
#[test]
fn test_config_7b_preset() {
let cfg = StarCoder2Config::starcoder2_7b();
assert_eq!(cfg.vocab_size, 49152);
assert_eq!(cfg.hidden_size, 4608);
assert_eq!(cfg.num_hidden_layers, 32);
assert_eq!(cfg.num_key_value_heads, 2);
assert!(cfg.validate().is_ok());
}
#[test]
fn test_config_15b_preset() {
let cfg = StarCoder2Config::starcoder2_15b();
assert_eq!(cfg.vocab_size, 49152);
assert_eq!(cfg.hidden_size, 6144);
assert_eq!(cfg.num_hidden_layers, 40);
assert_eq!(cfg.num_key_value_heads, 2);
assert!(cfg.validate().is_ok());
}
#[test]
fn test_config_num_kv_heads_is_two_for_all_sizes() {
for cfg in [
StarCoder2Config::starcoder2_3b(),
StarCoder2Config::starcoder2_7b(),
StarCoder2Config::starcoder2_15b(),
] {
assert_eq!(
cfg.num_key_value_heads, 2,
"Expected 2 KV heads for all sizes"
);
}
}
#[test]
fn test_config_use_bias_true_for_all_presets() {
for cfg in [
StarCoder2Config::starcoder2_3b(),
StarCoder2Config::starcoder2_7b(),
StarCoder2Config::starcoder2_15b(),
] {
assert!(cfg.use_bias, "StarCoder2 should always have bias=true");
}
}
#[test]
fn test_config_head_dim_and_query_groups() {
let cfg = StarCoder2Config::starcoder2_3b();
assert_eq!(cfg.head_dim(), 128);
assert_eq!(cfg.num_query_groups(), 12);
}
#[test]
fn test_config_small_test() {
let cfg = StarCoder2Config::small_test();
assert!(cfg.validate().is_ok());
assert_eq!(cfg.num_key_value_heads, 2);
assert!(cfg.use_bias);
}
#[test]
fn test_rms_norm_forward() {
let norm = StarCoder2RmsNorm::new(4, 1e-5).expect("build norm");
let input = Tensor::from_vec(vec![1.0f32, 2.0, 3.0, 4.0], &[4]).expect("build tensor");
let output = norm.forward(input).expect("forward");
assert_eq!(output.shape(), &[4]);
}
#[test]
fn test_rotary_embedding_half_dim() {
let rope = StarCoder2RotaryEmbedding::new(128, 16384, 10000.0);
assert_eq!(rope.half_dim(), 64);
assert_eq!(rope.head_dim, 128);
}
#[test]
fn test_rotary_embedding_apply() {
let rope = StarCoder2RotaryEmbedding::new(16, 64, 10000.0);
let q = Tensor::from_vec(vec![0.1f32; 16], &[16]).expect("q");
let k = Tensor::from_vec(vec![0.2f32; 16], &[16]).expect("k");
let (q_r, k_r) = rope.apply_rotary_emb(&q, &k, &[0, 1]).expect("apply");
assert_eq!(q_r.shape(), q.shape());
assert_eq!(k_r.shape(), k.shape());
}
#[test]
fn test_attention_num_kv_heads() {
let cfg = StarCoder2Config::small_test();
let attn = StarCoder2Attention::new(&cfg).expect("build attn");
assert_eq!(attn.num_kv_heads(), 2);
assert_eq!(attn.num_heads(), cfg.num_attention_heads);
assert_eq!(attn.head_dim(), cfg.head_dim());
}
#[test]
fn test_fim_format_prompt() {
let prompt = format_fim_prompt("prefix_code", "suffix_code");
assert_eq!(
prompt,
"<fim_prefix>prefix_code<fim_suffix>suffix_code<fim_middle>"
);
}
#[test]
fn test_fim_parse_output() {
let out = "<fim_prefix>x<fim_suffix>z<fim_middle>middle_part<|endoftext|>";
let parsed = parse_fim_output(out).expect("parse");
assert_eq!(parsed, "middle_part");
}
#[test]
fn test_fim_parse_no_eot() {
let out = "<fim_middle>generated code here";
assert_eq!(
parse_fim_output(out),
Some("generated code here".to_string())
);
}
#[test]
fn test_fim_tokens_default() {
let tokens = FimTokens::default();
assert_eq!(tokens.prefix_id, 1);
assert_eq!(tokens.suffix_id, 3);
}
#[test]
fn test_model_forward_small() {
let cfg = StarCoder2Config::small_test();
let model = StarCoder2Model::new(cfg).expect("build model");
let output = model.forward(vec![1u32, 2, 3]).expect("forward");
let shape = output.shape().to_vec();
assert_eq!(shape[0], 1);
assert_eq!(shape[1], 3);
}
#[test]
fn test_causal_lm_forward_small() {
let cfg = StarCoder2Config::small_test();
let lm = StarCoder2ForCausalLM::new(cfg.clone()).expect("build lm");
let logits = lm.forward(vec![1u32, 2]).expect("forward");
let shape = logits.shape().to_vec();
assert_eq!(shape[0], 1);
assert_eq!(shape[1], 2);
assert_eq!(shape[2], cfg.vocab_size);
}
#[test]
fn test_mlp_parameter_count_includes_bias() {
let cfg = StarCoder2Config::small_test();
assert!(cfg.use_bias);
let mlp = StarCoder2MLP::new(&cfg).expect("build mlp");
let expected_no_bias =
cfg.hidden_size * cfg.intermediate_size * 2 + cfg.intermediate_size * cfg.hidden_size;
assert!(mlp.parameter_count() > expected_no_bias);
}
#[test]
fn test_fim_format_psm_ordering() {
let prompt = format_fim_prompt("AAA", "BBB");
let prefix_pos = prompt.find("<fim_prefix>").expect("no prefix marker");
let suffix_pos = prompt.find("<fim_suffix>").expect("no suffix marker");
let middle_pos = prompt.find("<fim_middle>").expect("no middle marker");
assert!(prefix_pos < suffix_pos, "prefix must come before suffix");
assert!(suffix_pos < middle_pos, "suffix must come before middle");
}
#[test]
fn test_fim_format_empty_prefix() {
let prompt = format_fim_prompt("", "some_suffix");
assert_eq!(prompt, "<fim_prefix><fim_suffix>some_suffix<fim_middle>");
}
#[test]
fn test_fim_parse_empty_middle() {
let raw = "<fim_prefix>x<fim_suffix>y<fim_middle><|endoftext|>";
let middle = parse_fim_output(raw);
assert_eq!(middle, Some(String::new()));
}
#[test]
fn test_fim_parse_middle_with_newlines() {
let raw = "<fim_middle>line1\nline2\nline3<|endoftext|>";
let middle = parse_fim_output(raw);
assert_eq!(middle, Some("line1\nline2\nline3".to_string()));
}
#[test]
fn test_fim_tokens_all_fields() {
let tokens = FimTokens::default();
assert_eq!(tokens.prefix_id, 1);
assert_eq!(tokens.middle_id, 2);
assert_eq!(tokens.suffix_id, 3);
assert_eq!(tokens.pad_id, 4);
let ids = [
tokens.prefix_id,
tokens.middle_id,
tokens.suffix_id,
tokens.pad_id,
];
let unique: std::collections::HashSet<u32> = ids.iter().copied().collect();
assert_eq!(unique.len(), 4, "all FIM token IDs must be distinct");
}
#[test]
fn test_fim_tokens_custom() {
let custom = FimTokens {
prefix_id: 10,
middle_id: 11,
suffix_id: 12,
pad_id: 13,
};
assert_eq!(custom.prefix_id, 10);
assert_eq!(custom.middle_id, 11);
assert_eq!(custom.suffix_id, 12);
assert_eq!(custom.pad_id, 13);
}
#[test]
fn test_config_default_is_3b() {
let default = StarCoder2Config::default();
let three_b = StarCoder2Config::starcoder2_3b();
assert_eq!(default.hidden_size, three_b.hidden_size);
assert_eq!(default.num_hidden_layers, three_b.num_hidden_layers);
}
#[test]
fn test_config_rope_theta() {
for cfg in [
StarCoder2Config::starcoder2_3b(),
StarCoder2Config::starcoder2_7b(),
StarCoder2Config::starcoder2_15b(),
] {
assert!(
(cfg.rope_theta - 10000.0).abs() < 1.0,
"rope_theta should be 10000"
);
}
}
#[test]
fn test_config_sliding_window_none_for_released() {
for cfg in [
StarCoder2Config::starcoder2_3b(),
StarCoder2Config::starcoder2_7b(),
StarCoder2Config::starcoder2_15b(),
] {
assert!(
cfg.sliding_window.is_none(),
"no sliding window in released checkpoints"
);
}
}
#[test]
fn test_config_max_position_embeddings() {
for cfg in [
StarCoder2Config::starcoder2_3b(),
StarCoder2Config::starcoder2_7b(),
StarCoder2Config::starcoder2_15b(),
] {
assert_eq!(
cfg.max_position_embeddings, 16384,
"all presets support 16K context"
);
}
}
#[test]
fn test_rms_norm_unit_rms() {
let norm = StarCoder2RmsNorm::new(4, 1e-5).expect("build norm");
let input = Tensor::from_vec(vec![1.0f32, 2.0, 3.0, 4.0], &[4]).expect("tensor");
let output = norm.forward(input).expect("forward");
let out_vec: Vec<f32> = match &output {
trustformers_core::tensor::Tensor::F32(arr) => arr.iter().copied().collect(),
_ => panic!("expected F32"),
};
let rms: f32 = (out_vec.iter().map(|v| v * v).sum::<f32>() / 4.0).sqrt();
assert!(
(rms - 1.0).abs() < 0.01,
"RMS of output should be ≈ 1, got {rms}"
);
}
#[test]
fn test_model_single_token_forward() {
let cfg = StarCoder2Config::small_test();
let model = StarCoder2Model::new(cfg.clone()).expect("build model");
let output = model.forward(vec![7u32]).expect("single token forward");
let shape = output.shape().to_vec();
assert_eq!(shape[0], 1, "batch");
assert_eq!(shape[1], 1, "seq_len=1");
assert_eq!(shape[2], cfg.hidden_size);
}