mod config;
mod model;
mod tasks;
pub use config::{Jamba2Config, Jamba2ConfigError, LayerType};
pub use model::{Jamba2Attention, Jamba2DecoderLayer, Jamba2Error, Jamba2Model, MambaBlock};
pub use tasks::{CausalLmOutput, Jamba2ForCausalLM, Jamba2TaskError};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config_values() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.vocab_size, 65536);
assert_eq!(cfg.hidden_size, 4096);
assert_eq!(cfg.intermediate_size, 14336);
assert_eq!(cfg.num_hidden_layers, 32);
assert_eq!(cfg.num_attention_heads, 32);
assert_eq!(cfg.num_key_value_heads, 8);
assert_eq!(cfg.head_dim, 128);
assert_eq!(cfg.mamba_d_state, 16);
assert_eq!(cfg.mamba_d_conv, 4);
assert_eq!(cfg.mamba_expand, 2);
assert_eq!(cfg.mamba_dt_rank, 256);
assert_eq!(cfg.attn_layer_offset, 4);
assert_eq!(cfg.attn_layer_period, 8);
assert_eq!(cfg.expert_layer_offset, 1);
assert_eq!(cfg.expert_layer_period, 2);
assert_eq!(cfg.num_experts, 16);
assert_eq!(cfg.num_experts_per_tok, 2);
assert_eq!(cfg.max_position_embeddings, 262144);
assert!((cfg.rms_norm_eps - 1e-5).abs() < 1e-10);
assert!((cfg.rope_theta - 10000.0).abs() < 1e-5);
assert_eq!(cfg.hidden_act, "silu");
assert!(!cfg.tie_word_embeddings);
}
#[test]
fn test_is_attention_layer() {
let cfg = Jamba2Config::default();
assert!(!cfg.is_attention_layer(0));
assert!(!cfg.is_attention_layer(1));
assert!(!cfg.is_attention_layer(3));
assert!(cfg.is_attention_layer(4));
assert!(!cfg.is_attention_layer(5));
assert!(!cfg.is_attention_layer(11));
assert!(cfg.is_attention_layer(12));
assert!(cfg.is_attention_layer(20));
assert!(cfg.is_attention_layer(28));
assert!(!cfg.is_attention_layer(29));
}
#[test]
fn test_is_moe_layer() {
let cfg = Jamba2Config::default();
assert!(!cfg.is_moe_layer(0));
assert!(cfg.is_moe_layer(1));
assert!(!cfg.is_moe_layer(2));
assert!(cfg.is_moe_layer(3));
assert!(!cfg.is_moe_layer(4));
assert!(cfg.is_moe_layer(5));
assert!(cfg.is_moe_layer(11));
}
#[test]
fn test_layer_type() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.layer_type(0), LayerType::Mamba);
assert_eq!(cfg.layer_type(1), LayerType::MambaMoE);
assert_eq!(cfg.layer_type(2), LayerType::Mamba);
assert_eq!(cfg.layer_type(3), LayerType::MambaMoE);
assert_eq!(cfg.layer_type(4), LayerType::Attention);
assert_eq!(cfg.layer_type(5), LayerType::MambaMoE);
assert_eq!(cfg.layer_type(12), LayerType::Attention);
}
#[test]
fn test_mamba_inner_dim() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.mamba_inner_dim(), 8192);
let cfg_small = Jamba2Config {
hidden_size: 512,
mamba_expand: 4,
..Jamba2Config::default()
};
assert_eq!(cfg_small.mamba_inner_dim(), 2048);
}
#[test]
fn test_jamba2_1_5b_preset() {
let cfg = Jamba2Config::jamba2_1_5b();
assert_eq!(cfg.hidden_size, 2048);
assert_eq!(cfg.num_hidden_layers, 12);
assert_eq!(cfg.num_attention_heads, 16);
assert_eq!(cfg.num_key_value_heads, 4);
assert_eq!(cfg.mamba_dt_rank, 128);
assert_eq!(cfg.mamba_inner_dim(), 4096);
}
#[test]
fn test_validate() {
let valid = Jamba2Config::default();
assert!(valid.validate().is_ok());
let bad_vocab = Jamba2Config {
vocab_size: 0,
..Jamba2Config::default()
};
assert!(bad_vocab.validate().is_err());
let bad_heads = Jamba2Config {
num_attention_heads: 7,
num_key_value_heads: 3, ..Jamba2Config::default()
};
assert!(bad_heads.validate().is_err());
let bad_experts = Jamba2Config {
num_experts_per_tok: 0,
..Jamba2Config::default()
};
assert!(bad_experts.validate().is_err());
let bad_period = Jamba2Config {
attn_layer_period: 0,
..Jamba2Config::default()
};
assert!(bad_period.validate().is_err());
}
#[test]
fn test_layer_combination_coverage() {
let cfg = Jamba2Config::default();
let mut mamba_count = 0usize;
let mut attn_count = 0usize;
let mut mamba_moe_count = 0usize;
let mut attn_moe_count = 0usize;
for i in 0..cfg.num_hidden_layers {
match cfg.layer_type(i) {
LayerType::Mamba => mamba_count += 1,
LayerType::Attention => attn_count += 1,
LayerType::MambaMoE => mamba_moe_count += 1,
LayerType::AttentionMoE => attn_moe_count += 1,
}
}
assert_eq!(
attn_count + attn_moe_count,
4,
"Should have 4 attention layers total"
);
assert_eq!(
mamba_count + mamba_moe_count,
28,
"Should have 28 Mamba layers total"
);
assert_eq!(
mamba_count + attn_count + mamba_moe_count + attn_moe_count,
32
);
}
fn small_jamba2_config() -> Jamba2Config {
Jamba2Config {
vocab_size: 256,
hidden_size: 64,
intermediate_size: 128,
num_hidden_layers: 4,
num_attention_heads: 4,
num_key_value_heads: 2,
head_dim: 16,
mamba_d_state: 4,
mamba_d_conv: 2,
mamba_expand: 2,
mamba_dt_rank: 4,
attn_layer_offset: 1,
attn_layer_period: 2,
expert_layer_offset: 1,
expert_layer_period: 2,
num_experts: 4,
num_experts_per_tok: 2,
max_position_embeddings: 512,
rms_norm_eps: 1e-5,
rope_theta: 10000.0,
hidden_act: "silu".to_string(),
attention_dropout: 0.0,
tie_word_embeddings: false,
}
}
#[test]
fn test_jamba2_1_5b_validate_ok() {
let cfg = Jamba2Config::jamba2_1_5b();
assert!(cfg.validate().is_ok(), "jamba2_1_5b should pass validation");
}
#[test]
fn test_jamba2_1_5b_layer_types() {
let cfg = Jamba2Config::jamba2_1_5b();
assert_eq!(cfg.layer_type(0), LayerType::Mamba);
assert_eq!(cfg.layer_type(1), LayerType::MambaMoE);
assert_eq!(cfg.layer_type(2), LayerType::Mamba);
assert_eq!(cfg.layer_type(4), LayerType::Attention);
assert_eq!(cfg.layer_type(5), LayerType::MambaMoE);
}
#[test]
fn test_effective_dt_rank_auto_compute() {
let cfg = Jamba2Config {
hidden_size: 512,
mamba_dt_rank: 0,
..Jamba2Config::default()
};
assert_eq!(cfg.effective_dt_rank(), 32);
}
#[test]
fn test_effective_dt_rank_explicit() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.effective_dt_rank(), 256);
}
#[test]
fn test_jamba2_1_5b_mamba_inner_dim() {
let cfg = Jamba2Config::jamba2_1_5b();
assert_eq!(cfg.mamba_inner_dim(), 4096);
}
#[test]
fn test_attn_moe_layer_exists_in_custom_config() {
let cfg = Jamba2Config {
attn_layer_offset: 1,
attn_layer_period: 4,
expert_layer_offset: 1,
expert_layer_period: 2,
..Jamba2Config::default()
};
assert_eq!(cfg.layer_type(1), LayerType::AttentionMoE);
}
#[test]
fn test_num_experts_default() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.num_experts, 16);
}
#[test]
fn test_num_experts_per_tok_default() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.num_experts_per_tok, 2);
}
#[test]
fn test_mamba_d_state_default() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.mamba_d_state, 16);
}
#[test]
fn test_mamba_d_conv_default() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.mamba_d_conv, 4);
}
#[test]
fn test_max_position_embeddings_default() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.max_position_embeddings, 262144);
}
#[test]
fn test_config_clone() {
let cfg = Jamba2Config::default();
let cloned = cfg.clone();
assert_eq!(cloned.vocab_size, cfg.vocab_size);
assert_eq!(cloned.hidden_size, cfg.hidden_size);
assert_eq!(cloned.num_experts, cfg.num_experts);
assert_eq!(cloned.mamba_d_state, cfg.mamba_d_state);
assert_eq!(cloned.attn_layer_period, cfg.attn_layer_period);
assert_eq!(cloned.tie_word_embeddings, cfg.tie_word_embeddings);
}
#[test]
fn test_config_debug() {
let cfg = Jamba2Config::default();
let s = format!("{:?}", cfg);
assert!(
s.contains("Jamba2Config"),
"debug must contain type name, got: {s}"
);
assert!(
s.contains("vocab_size"),
"debug must contain vocab_size, got: {s}"
);
assert!(
s.contains("num_experts"),
"debug must contain num_experts, got: {s}"
);
}
#[test]
fn test_validate_fails_hidden_size_zero() {
let cfg = Jamba2Config {
hidden_size: 0,
..Jamba2Config::default()
};
assert!(cfg.validate().is_err(), "hidden_size=0 should fail");
}
#[test]
fn test_validate_fails_num_hidden_layers_zero() {
let cfg = Jamba2Config {
num_hidden_layers: 0,
..Jamba2Config::default()
};
assert!(cfg.validate().is_err(), "num_hidden_layers=0 should fail");
}
#[test]
fn test_validate_fails_mamba_expand_zero() {
let cfg = Jamba2Config {
mamba_expand: 0,
..Jamba2Config::default()
};
assert!(cfg.validate().is_err(), "mamba_expand=0 should fail");
}
#[test]
fn test_validate_fails_experts_per_tok_exceeds_num_experts() {
let cfg = Jamba2Config {
num_experts: 4,
num_experts_per_tok: 8,
..Jamba2Config::default()
};
assert!(
cfg.validate().is_err(),
"experts_per_tok > num_experts should fail"
);
}
#[test]
fn test_validate_fails_expert_layer_period_zero() {
let cfg = Jamba2Config {
expert_layer_period: 0,
..Jamba2Config::default()
};
assert!(cfg.validate().is_err(), "expert_layer_period=0 should fail");
}
#[test]
fn test_jamba2_error_display_empty_input() {
let err = Jamba2Error::EmptyInput;
let s = err.to_string();
assert!(
s.to_lowercase().contains("empty"),
"EmptyInput display should mention 'empty', got: {s}"
);
}
#[test]
fn test_jamba2_error_display_dimension_mismatch() {
let err = Jamba2Error::DimensionMismatch {
expected: 64,
got: 32,
context: "test_context".to_string(),
};
let s = err.to_string();
assert!(s.contains("64"), "should contain expected value, got: {s}");
assert!(s.contains("32"), "should contain got value, got: {s}");
assert!(
s.contains("test_context"),
"should contain context, got: {s}"
);
}
#[test]
fn test_jamba2_task_error_display_config_variant() {
let config_err = Jamba2ConfigError::InvalidField("bad field".to_string());
let task_err: Jamba2TaskError = config_err.into();
let s = task_err.to_string();
assert!(
s.contains("bad field") || s.to_lowercase().contains("config"),
"Config task error should mention config or field, got: {s}"
);
}
#[test]
fn test_causal_lm_forward_logits_shape() {
let cfg = small_jamba2_config();
let lm = Jamba2ForCausalLM::new(cfg.clone()).expect("Jamba2ForCausalLM::new");
let input = &[1u32, 2, 3];
let output = lm.forward(input).expect("forward");
assert_eq!(
output.logits.len(),
3,
"logits seq_len must match input length"
);
assert_eq!(
output.logits[0].len(),
cfg.vocab_size,
"each logit row must have vocab_size entries"
);
}
#[test]
fn test_causal_lm_forward_hidden_states_shape() {
let cfg = small_jamba2_config();
let lm = Jamba2ForCausalLM::new(cfg.clone()).expect("Jamba2ForCausalLM::new");
let output = lm.forward(&[5u32, 6]).expect("forward");
assert_eq!(
output.hidden_states.len(),
2,
"hidden_states must match seq_len"
);
assert_eq!(
output.hidden_states[0].len(),
cfg.hidden_size,
"hidden state width must equal hidden_size"
);
}
#[test]
fn test_causal_lm_generate_token_count() {
let cfg = small_jamba2_config();
let lm = Jamba2ForCausalLM::new(cfg).expect("Jamba2ForCausalLM::new");
let generated = lm.generate(&[1u32, 2], 5).expect("generate");
assert_eq!(
generated.len(),
5,
"generate must return exactly max_new_tokens tokens"
);
}
#[test]
fn test_causal_lm_generate_empty_input_error() {
let cfg = small_jamba2_config();
let lm = Jamba2ForCausalLM::new(cfg).expect("Jamba2ForCausalLM::new");
let result = lm.generate(&[], 3);
assert!(result.is_err(), "generate with empty input must return Err");
}
#[test]
fn test_ssm_layer_count_default() {
let cfg = Jamba2Config::default();
let ssm_count = (0..cfg.num_hidden_layers).filter(|&i| !cfg.is_attention_layer(i)).count();
assert_eq!(
ssm_count, 28,
"default config must have 28 SSM (Mamba) layers"
);
}
#[test]
fn test_attention_layer_count_1_5b() {
let cfg = Jamba2Config::jamba2_1_5b();
let attn_count = (0..cfg.num_hidden_layers).filter(|&i| cfg.is_attention_layer(i)).count();
assert_eq!(attn_count, 1, "jamba2_1_5b must have 1 attention layer");
}
#[test]
fn test_vocab_size_default() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.vocab_size, 65536);
}
#[test]
fn test_vocab_size_1_5b() {
let cfg = Jamba2Config::jamba2_1_5b();
assert_eq!(cfg.vocab_size, 65536);
}
#[test]
fn test_gqa_ratio_default() {
let cfg = Jamba2Config::default();
assert_eq!(cfg.num_attention_heads / cfg.num_key_value_heads, 4);
}
#[test]
fn test_mamba_dt_rank_1_5b_matches_auto() {
let cfg = Jamba2Config::jamba2_1_5b();
let auto = cfg.hidden_size.div_ceil(16);
assert_eq!(
cfg.mamba_dt_rank, auto,
"jamba2_1_5b dt_rank should equal ceil(hidden/16)"
);
}
#[test]
fn test_attention_dropout_default() {
let cfg = Jamba2Config::default();
assert!((cfg.attention_dropout - 0.0_f32).abs() < 1e-7);
}
#[test]
fn test_layer_type_partial_eq() {
assert_eq!(LayerType::Mamba, LayerType::Mamba);
assert_eq!(LayerType::Attention, LayerType::Attention);
assert_eq!(LayerType::MambaMoE, LayerType::MambaMoE);
assert_eq!(LayerType::AttentionMoE, LayerType::AttentionMoE);
assert_ne!(LayerType::Mamba, LayerType::Attention);
}
#[test]
fn test_jamba2_error_display_layer_error() {
let err = Jamba2Error::LayerError {
layer: 3,
msg: "oops".to_string(),
};
let s = err.to_string();
assert!(s.contains('3'), "should mention layer index, got: {s}");
assert!(s.contains("oops"), "should mention message, got: {s}");
}
#[test]
fn test_causal_lm_generate_tokens_within_vocab() {
let cfg = small_jamba2_config();
let vocab = cfg.vocab_size;
let lm = Jamba2ForCausalLM::new(cfg).expect("Jamba2ForCausalLM::new");
let generated = lm.generate(&[0u32], 4).expect("generate");
for &tok in &generated {
assert!(
(tok as usize) < vocab,
"generated token {tok} out of vocab range {vocab}"
);
}
}
#[test]
fn test_validate_fails_num_attention_heads_zero() {
let cfg = Jamba2Config {
num_attention_heads: 0,
..Jamba2Config::default()
};
assert!(
cfg.validate().is_err(),
"num_attention_heads=0 should fail validation"
);
}
#[test]
fn test_validate_fails_num_key_value_heads_zero() {
let cfg = Jamba2Config {
num_key_value_heads: 0,
..Jamba2Config::default()
};
assert!(
cfg.validate().is_err(),
"num_key_value_heads=0 should fail validation"
);
}
}