pub mod config;
pub mod model;
pub mod tasks;
pub use config::Qwen25Config;
pub use model::{
silu, swiglu, Qwen25Attention, Qwen25DecoderLayer, Qwen25MLP, Qwen25Model, Qwen25RmsNorm,
Qwen25RotaryEmbedding,
};
pub use tasks::{Qwen25Error, Qwen25ForCausalLM, Qwen25ForSequenceClassification};
#[cfg(test)]
mod tests {
use super::*;
use trustformers_core::traits::Config;
fn tiny_config() -> Qwen25Config {
Qwen25Config {
vocab_size: 64,
hidden_size: 32,
intermediate_size: 64,
num_hidden_layers: 2,
num_attention_heads: 4,
num_key_value_heads: 2,
head_dim: 8,
max_position_embeddings: 512,
..Qwen25Config::default()
}
}
#[test]
fn test_config_defaults() {
let cfg = Qwen25Config::default();
assert_eq!(cfg.vocab_size, 151936);
assert_eq!(cfg.hidden_size, 3584);
assert_eq!(cfg.intermediate_size, 18944);
assert_eq!(cfg.num_hidden_layers, 28);
assert_eq!(cfg.num_attention_heads, 28);
assert_eq!(cfg.num_key_value_heads, 4);
assert_eq!(cfg.head_dim, 128);
assert_eq!(cfg.max_position_embeddings, 32768);
assert!((cfg.rope_theta - 1_000_000.0).abs() < 1.0);
assert!(!cfg.use_sliding_window);
assert!(cfg.sliding_window.is_none());
assert_eq!(cfg.max_window_layers, 28);
assert!(!cfg.tie_word_embeddings);
assert!(!cfg.use_mrope);
assert_eq!(cfg.hidden_act, "silu");
}
#[test]
fn test_gqa_dimensions() {
let cfg = Qwen25Config::default();
assert!(
cfg.num_key_value_heads < cfg.num_attention_heads,
"GQA: kv_heads ({}) must be < attn_heads ({})",
cfg.num_key_value_heads,
cfg.num_attention_heads
);
assert_eq!(cfg.kv_group_size(), 7);
let tiny = tiny_config();
assert_eq!(tiny.kv_group_size(), 2); }
#[test]
fn test_sliding_window_config() {
let cfg = Qwen25Config::default();
assert!(!cfg.layer_uses_sliding_window(0));
assert!(!cfg.layer_uses_sliding_window(27));
let sw_cfg = Qwen25Config {
use_sliding_window: true,
sliding_window: Some(1024),
max_window_layers: 2,
num_hidden_layers: 4,
..tiny_config()
};
assert!(sw_cfg.validate().is_ok());
assert!(!sw_cfg.layer_uses_sliding_window(0));
assert!(!sw_cfg.layer_uses_sliding_window(1));
assert!(sw_cfg.layer_uses_sliding_window(2));
assert!(sw_cfg.layer_uses_sliding_window(3));
}
#[test]
fn test_swiglu_forward() {
use trustformers_core::device::Device;
use trustformers_core::tensor::Tensor;
use trustformers_core::traits::Layer;
let cfg = tiny_config();
let mlp = Qwen25MLP::new(&cfg, Device::CPU);
let input = Tensor::from_vec(vec![1.0f32; 32], &[1, 32]).expect("tensor");
let result = mlp.forward(input);
assert!(result.is_ok(), "SwiGLU forward failed: {:?}", result.err());
}
#[test]
fn test_model_layer_count() {
use trustformers_core::traits::Model;
let cfg = tiny_config();
let n_layers = cfg.num_hidden_layers;
let model = Qwen25Model::new(cfg).expect("model creation");
assert!(model.num_parameters() > 0);
assert_eq!(model.config().num_hidden_layers, n_layers);
}
#[test]
fn test_sequence_classification_head() {
let cfg = tiny_config();
let num_labels = 5;
let clf =
Qwen25ForSequenceClassification::new(cfg, num_labels).expect("classifier creation");
assert_eq!(clf.num_labels(), num_labels);
let logits = clf.classify(&[1u32, 2, 3]).expect("classify");
assert_eq!(
logits.len(),
num_labels,
"expected {} logits, got {}",
num_labels,
logits.len()
);
}
#[test]
fn test_config_validation() {
let mut cfg = tiny_config();
assert!(cfg.validate().is_ok());
cfg.num_attention_heads = 5;
cfg.num_key_value_heads = 3;
assert!(
cfg.validate().is_err(),
"5 attn heads / 3 kv heads should fail"
);
cfg.num_attention_heads = 4;
cfg.num_key_value_heads = 2;
cfg.use_sliding_window = true;
cfg.sliding_window = None;
assert!(
cfg.validate().is_err(),
"sliding window enabled with None should fail"
);
cfg.use_sliding_window = false;
cfg.hidden_size = 0;
assert!(cfg.validate().is_err(), "hidden_size=0 should fail");
cfg.hidden_size = 32;
assert!(cfg.validate().is_ok(), "restored config should pass");
}
#[test]
fn test_causal_lm_forward_and_generate() {
use trustformers_core::tensor::Tensor;
use trustformers_core::traits::Model;
let cfg = tiny_config();
let model = Qwen25ForCausalLM::new(cfg).expect("causal lm creation");
let input = Tensor::from_vec(vec![1.0f32, 2.0, 3.0], &[3]).expect("tensor");
let fwd = model.forward(input);
assert!(fwd.is_ok(), "forward failed: {:?}", fwd.err());
let gen = model.generate(&[1u32, 2, 3], 2);
assert!(gen.is_ok(), "generate failed: {:?}", gen.err());
assert_eq!(gen.expect("gen").len(), 2);
}
#[test]
fn test_qwen25_0_5b_preset() {
let cfg = Qwen25Config::qwen25_0_5b();
assert_eq!(cfg.hidden_size, 896);
assert_eq!(cfg.num_hidden_layers, 24);
assert_eq!(cfg.num_attention_heads, 14);
assert_eq!(cfg.num_key_value_heads, 2);
assert_eq!(cfg.kv_group_size(), 7);
assert!(cfg.tie_word_embeddings);
assert!(cfg.validate().is_ok());
}
#[test]
fn test_qwen25_7b_preset() {
let cfg = Qwen25Config::qwen25_7b();
assert_eq!(cfg.hidden_size, 3584);
assert_eq!(cfg.num_hidden_layers, 28);
assert_eq!(cfg.num_attention_heads, 28);
assert_eq!(cfg.num_key_value_heads, 4);
assert_eq!(cfg.head_dim, 128);
assert_eq!(cfg.intermediate_size, 18944);
assert_eq!(cfg.max_position_embeddings, 32768);
assert!(!cfg.tie_word_embeddings, "7B does not tie embeddings");
assert!(cfg.validate().is_ok());
}
#[test]
fn test_head_dim_explicit_field() {
let cfg = Qwen25Config::qwen25_0_5b();
assert_eq!(cfg.head_dim, 64);
let cfg7b = Qwen25Config::default();
assert_eq!(cfg7b.head_dim, 128);
}
#[test]
fn test_max_position_embeddings() {
for cfg in [Qwen25Config::qwen25_0_5b(), Qwen25Config::qwen25_7b()] {
assert_eq!(cfg.max_position_embeddings, 32768);
}
}
#[test]
fn test_rope_theta_presets() {
for cfg in [Qwen25Config::qwen25_0_5b(), Qwen25Config::qwen25_7b()] {
assert!(
(cfg.rope_theta - 1_000_000.0_f64).abs() < 1.0,
"rope_theta must be 1_000_000, got {}",
cfg.rope_theta
);
}
}
#[test]
fn test_use_mrope_default_false() {
let cfg = Qwen25Config::default();
assert!(!cfg.use_mrope, "use_mrope should default to false");
let mut mm_cfg = tiny_config();
mm_cfg.use_mrope = true;
assert!(mm_cfg.use_mrope);
}
#[test]
fn test_architecture_string() {
let cfg = Qwen25Config::default();
assert_eq!(cfg.architecture(), "Qwen2.5");
}
#[test]
fn test_config_clone() {
let cfg = Qwen25Config::qwen25_0_5b();
let cloned = cfg.clone();
assert_eq!(cloned.vocab_size, cfg.vocab_size);
assert_eq!(cloned.hidden_size, cfg.hidden_size);
assert_eq!(cloned.head_dim, cfg.head_dim);
assert_eq!(cloned.tie_word_embeddings, cfg.tie_word_embeddings);
}
#[test]
fn test_config_debug() {
let cfg = tiny_config();
let dbg = format!("{:?}", cfg);
assert!(dbg.contains("Qwen25Config"));
assert!(dbg.contains("vocab_size"));
assert!(dbg.contains("hidden_size"));
assert!(dbg.contains("head_dim"));
}
#[test]
fn test_forward_ids_empty_input_error() {
let cfg = tiny_config();
let model = Qwen25ForCausalLM::new(cfg).expect("causal lm creation");
let result = model.forward_ids(&[]);
assert!(result.is_err(), "empty input should return Err");
let err_str = result.unwrap_err().to_string();
assert!(
err_str.to_lowercase().contains("empty"),
"error message should mention 'empty', got: {err_str}"
);
}
#[test]
fn test_sequence_classification_single_label() {
let cfg = tiny_config();
let clf =
Qwen25ForSequenceClassification::new(cfg, 1).expect("single-label classifier creation");
assert_eq!(clf.num_labels(), 1);
let logits = clf.classify(&[0u32, 1]).expect("classify");
assert_eq!(logits.len(), 1);
}
#[test]
fn test_generate_exact_length() {
let cfg = tiny_config();
let model = Qwen25ForCausalLM::new(cfg).expect("causal lm creation");
for n in [1, 3, 5] {
let gen = model.generate(&[1u32, 2], n).expect("generate");
assert_eq!(gen.len(), n, "generate should produce exactly {n} tokens");
}
}
#[test]
fn test_sliding_window_layer_boundary() {
let mut cfg = tiny_config();
cfg.use_sliding_window = true;
cfg.sliding_window = Some(512);
cfg.max_window_layers = 1;
cfg.num_hidden_layers = 3;
assert!(!cfg.layer_uses_sliding_window(0));
assert!(cfg.layer_uses_sliding_window(1));
assert!(cfg.layer_uses_sliding_window(2));
}
#[test]
fn test_qwen25_error_display_invalid_config() {
let err = Qwen25Error::InvalidConfig("test message".to_string());
let s = err.to_string();
assert!(
s.contains("invalid config") || s.contains("Qwen25"),
"got: {s}"
);
assert!(s.contains("test message"), "got: {s}");
}
#[test]
fn test_qwen25_error_display_shape_mismatch() {
let err = Qwen25Error::ShapeMismatch {
expected: vec![2, 4],
got: vec![3, 4],
};
let s = err.to_string();
assert!(s.contains("mismatch") || s.contains("shape"), "got: {s}");
}
#[test]
fn test_qwen25_error_display_empty_input() {
let err = Qwen25Error::EmptyInput;
let s = err.to_string();
assert!(s.to_lowercase().contains("empty"), "got: {s}");
}
#[test]
fn test_initializer_range_default() {
let cfg = Qwen25Config::default();
assert!(
(cfg.initializer_range - 0.02_f32).abs() < 1e-6,
"initializer_range default must be 0.02, got {}",
cfg.initializer_range
);
}
#[test]
fn test_hidden_act_is_silu() {
let cfg = Qwen25Config::default();
assert_eq!(cfg.hidden_act, "silu");
}
#[test]
fn test_silu_positive_input() {
let v = silu(1.0_f32);
assert!((v - 0.731_f32).abs() < 0.01, "silu(1) ≈ 0.731, got {v}");
}
#[test]
fn test_silu_zero_input() {
let v = silu(0.0_f32);
assert!(v.abs() < 1e-6, "silu(0) must be 0, got {v}");
}
#[test]
fn test_swiglu_length_preserved() {
let gate = vec![1.0_f32, -1.0, 2.0, 0.0];
let up = vec![1.0_f32; 4];
let out = swiglu(&gate, &up);
assert_eq!(out.len(), 4, "swiglu output length must match inputs");
}
}