pub mod config;
pub mod model;
pub mod tasks;
pub use config::{NemotronConfig, NormType};
pub use model::{
squared_relu, NemotronAttention, NemotronDecoderLayer, NemotronLayerNorm, NemotronMLP,
NemotronModel, NemotronNorm, NemotronPartialRotaryEmbedding, NemotronRmsNorm,
};
pub use tasks::{NemotronError, NemotronForCausalLM};
#[cfg(test)]
mod tests {
use super::*;
use trustformers_core::traits::Config as CoreConfig;
#[test]
fn test_nemotron_config_defaults() {
let cfg = NemotronConfig::default();
assert_eq!(cfg.vocab_size, 256000);
assert_eq!(cfg.hidden_size, 6144);
assert_eq!(cfg.intermediate_size, 24576);
assert_eq!(cfg.num_hidden_layers, 32);
assert_eq!(cfg.num_attention_heads, 48);
assert_eq!(cfg.num_key_value_heads, 8);
assert_eq!(cfg.head_dim, 128);
assert_eq!(cfg.max_position_embeddings, 4096);
assert!((cfg.rms_norm_eps - 1e-5).abs() < 1e-10);
assert!(!cfg.tie_word_embeddings);
assert!(!cfg.attention_bias);
assert!(!cfg.mlp_bias);
}
#[test]
fn test_nemotron_partial_rotary_factor() {
let cfg = NemotronConfig::default();
assert!(
(cfg.partial_rotary_factor - 0.5).abs() < 1e-6,
"expected 0.5, got {}",
cfg.partial_rotary_factor
);
}
#[test]
fn test_nemotron_rotary_dim() {
let cfg = NemotronConfig::default();
let expected = cfg.head_dim / 2; assert_eq!(
cfg.rotary_dim(),
expected,
"rotary_dim should be head_dim/2 = {}",
expected
);
}
#[test]
fn test_nemotron_squared_relu() {
assert!((squared_relu(0.0) - 0.0).abs() < 1e-7);
assert!((squared_relu(2.0) - 4.0).abs() < 1e-6);
assert!((squared_relu(-1.0) - 0.0).abs() < 1e-7);
assert!((squared_relu(3.0) - 9.0).abs() < 1e-5);
}
#[test]
fn test_nemotron_norm_type_dispatch() {
use trustformers_core::tensor::Tensor;
use trustformers_core::traits::Layer;
let rms = NemotronNorm::new(4, 1e-5, &NormType::RmsNorm).unwrap();
let ln = NemotronNorm::new(4, 1e-5, &NormType::LayerNorm).unwrap();
let input = Tensor::from_vec(vec![1.0f32, 2.0, 3.0, 4.0], &[4]).unwrap();
assert!(rms.forward(input.clone()).is_ok());
assert!(ln.forward(input).is_ok());
}
#[test]
fn test_nemotron_340b_config() {
let cfg = NemotronConfig::nemotron_4_340b();
assert_eq!(cfg.hidden_size, 18432);
assert_eq!(cfg.num_hidden_layers, 96);
assert_eq!(cfg.num_attention_heads, 96);
assert_eq!(cfg.num_key_value_heads, 8);
assert_eq!(cfg.intermediate_size, 73728);
assert_eq!(cfg.vocab_size, 256000);
}
#[test]
fn test_nemotron_22b_config() {
let cfg = NemotronConfig::nemotron_4_22b();
assert_eq!(cfg.hidden_size, 6144);
assert_eq!(cfg.num_hidden_layers, 40);
assert_eq!(cfg.num_attention_heads, 48);
assert_eq!(cfg.num_key_value_heads, 8);
assert_eq!(cfg.intermediate_size, 24576);
}
#[test]
fn test_nemotron_validate_ok() {
let cfg = NemotronConfig::default();
assert!(CoreConfig::validate(&cfg).is_ok());
assert!(cfg.validate().is_ok());
}
#[test]
fn test_nemotron_validate_bad_kv_heads() {
let cfg = NemotronConfig {
num_key_value_heads: 7,
..NemotronConfig::default()
};
assert!(cfg.validate().is_err());
}
#[test]
fn test_nemotron_vocab_size() {
let cfg = NemotronConfig::default();
assert_eq!(cfg.vocab_size, 256000);
}
fn small_nemotron_config() -> NemotronConfig {
NemotronConfig {
vocab_size: 256,
hidden_size: 64,
intermediate_size: 128,
num_hidden_layers: 2,
num_attention_heads: 4,
num_key_value_heads: 2,
head_dim: 16,
max_position_embeddings: 512,
rms_norm_eps: 1e-5,
rope_theta: 10000.0,
partial_rotary_factor: 0.5,
hidden_act: "relu2".to_string(),
tie_word_embeddings: false,
norm_type: NormType::RmsNorm,
attention_bias: false,
mlp_bias: false,
}
}
#[test]
fn test_nemotron_340b_partial_rotary_factor() {
let cfg = NemotronConfig::nemotron_4_340b();
assert!(
(cfg.partial_rotary_factor - 0.5).abs() < 1e-6,
"340B partial_rotary_factor must be 0.5, got {}",
cfg.partial_rotary_factor
);
}
#[test]
fn test_nemotron_340b_rotary_dim() {
let cfg = NemotronConfig::nemotron_4_340b();
assert_eq!(cfg.rotary_dim(), 96, "340B rotary_dim must be 96");
}
#[test]
fn test_nemotron_22b_partial_rotary_factor() {
let cfg = NemotronConfig::nemotron_4_22b();
assert!(
(cfg.partial_rotary_factor - 0.5).abs() < 1e-6,
"22B partial_rotary_factor must be 0.5, got {}",
cfg.partial_rotary_factor
);
}
#[test]
fn test_nemotron_22b_rotary_dim() {
let cfg = NemotronConfig::nemotron_4_22b();
assert_eq!(cfg.rotary_dim(), 64, "22B rotary_dim must be 64");
}
#[test]
fn test_nemotron_340b_head_dim() {
let cfg = NemotronConfig::nemotron_4_340b();
assert_eq!(cfg.head_dim, 192);
}
#[test]
fn test_nemotron_22b_head_dim() {
let cfg = NemotronConfig::nemotron_4_22b();
assert_eq!(cfg.head_dim, 128);
}
#[test]
fn test_nemotron_340b_gqa_ratio() {
let cfg = NemotronConfig::nemotron_4_340b();
assert_eq!(cfg.num_attention_heads / cfg.num_key_value_heads, 12);
}
#[test]
fn test_nemotron_22b_gqa_ratio() {
let cfg = NemotronConfig::nemotron_4_22b();
assert_eq!(cfg.num_attention_heads / cfg.num_key_value_heads, 6);
}
#[test]
fn test_squared_relu_one() {
assert!((squared_relu(1.0) - 1.0).abs() < 1e-7);
}
#[test]
fn test_squared_relu_large_negative() {
assert!((squared_relu(-5.0) - 0.0).abs() < 1e-7);
}
#[test]
fn test_squared_relu_half() {
assert!((squared_relu(0.5) - 0.25).abs() < 1e-6);
}
#[test]
fn test_nemotron_hidden_act_relu2_all_presets() {
for cfg in [
NemotronConfig::default(),
NemotronConfig::nemotron_4_22b(),
NemotronConfig::nemotron_4_340b(),
] {
assert_eq!(cfg.hidden_act, "relu2", "hidden_act must be relu2");
}
}
#[test]
fn test_nemotron_attention_bias_false_all_presets() {
for cfg in [
NemotronConfig::default(),
NemotronConfig::nemotron_4_22b(),
NemotronConfig::nemotron_4_340b(),
] {
assert!(!cfg.attention_bias, "attention_bias must be false");
}
}
#[test]
fn test_nemotron_mlp_bias_false_all_presets() {
for cfg in [
NemotronConfig::default(),
NemotronConfig::nemotron_4_22b(),
NemotronConfig::nemotron_4_340b(),
] {
assert!(!cfg.mlp_bias, "mlp_bias must be false");
}
}
#[test]
fn test_nemotron_error_display_empty_input() {
use crate::nemotron::tasks::NemotronError;
let err = NemotronError::EmptyInput;
let s = err.to_string();
assert!(
s.to_lowercase().contains("empty"),
"EmptyInput display should mention empty, got: {s}"
);
}
#[test]
fn test_nemotron_error_display_invalid_config() {
use crate::nemotron::tasks::NemotronError;
let err = NemotronError::InvalidConfig("test reason".to_string());
let s = err.to_string();
assert!(
s.contains("test reason"),
"InvalidConfig should include message, got: {s}"
);
}
#[test]
fn test_nemotron_error_display_sequence_too_long() {
use crate::nemotron::tasks::NemotronError;
let err = NemotronError::SequenceTooLong {
max: 4096,
got: 8192,
};
let s = err.to_string();
assert!(s.contains("4096"), "should contain max, got: {s}");
assert!(s.contains("8192"), "should contain got, got: {s}");
}
#[test]
fn test_nemotron_error_display_shape_mismatch() {
use crate::nemotron::tasks::NemotronError;
let err = NemotronError::ShapeMismatch {
expected: vec![1, 64],
got: vec![1, 32],
};
let s = err.to_string();
assert!(s.contains("64"), "should contain expected dim, got: {s}");
assert!(s.contains("32"), "should contain got dim, got: {s}");
}
#[test]
fn test_nemotron_validate_partial_rotary_gt1() {
let cfg = NemotronConfig {
partial_rotary_factor: 1.5,
..NemotronConfig::default()
};
assert!(
cfg.validate().is_err(),
"partial_rotary_factor > 1.0 should fail"
);
}
#[test]
fn test_nemotron_validate_partial_rotary_negative() {
let cfg = NemotronConfig {
partial_rotary_factor: -0.1,
..NemotronConfig::default()
};
assert!(
cfg.validate().is_err(),
"partial_rotary_factor < 0.0 should fail"
);
}
#[test]
fn test_nemotron_validate_rms_norm_eps_zero() {
let cfg = NemotronConfig {
rms_norm_eps: 0.0,
..NemotronConfig::default()
};
assert!(
cfg.validate().is_err(),
"rms_norm_eps=0 should fail validation"
);
}
#[test]
fn test_nemotron_validate_rope_theta_nonpositive() {
let cfg = NemotronConfig {
rope_theta: -1.0,
..NemotronConfig::default()
};
assert!(
cfg.validate().is_err(),
"rope_theta<0 should fail validation"
);
}
#[test]
fn test_nemotron_config_clone() {
let cfg = NemotronConfig::default();
let cloned = cfg.clone();
assert!((cloned.partial_rotary_factor - cfg.partial_rotary_factor).abs() < 1e-7);
assert_eq!(cloned.norm_type, cfg.norm_type);
assert_eq!(cloned.vocab_size, cfg.vocab_size);
}
#[test]
fn test_nemotron_config_debug() {
let cfg = NemotronConfig::default();
let s = format!("{:?}", cfg);
assert!(
s.contains("NemotronConfig"),
"debug should contain type name, got: {s}"
);
assert!(
s.contains("partial_rotary_factor"),
"debug should mention field, got: {s}"
);
}
#[test]
fn test_norm_type_default_is_rmsnorm() {
let nt = NormType::default();
assert_eq!(nt, NormType::RmsNorm, "default NormType must be RmsNorm");
}
#[test]
fn test_nemotron_causal_lm_new_small() {
use crate::nemotron::tasks::NemotronForCausalLM;
let cfg = small_nemotron_config();
assert!(
NemotronForCausalLM::new(cfg).is_ok(),
"new() with valid small config must succeed"
);
}
#[test]
fn test_nemotron_generate_greedy_token_count() {
use crate::nemotron::tasks::NemotronForCausalLM;
let cfg = small_nemotron_config();
let model = NemotronForCausalLM::new(cfg).expect("new");
let result = model.generate_greedy(&[1u32, 2, 3], 4);
match result {
Ok(generated) => {
assert_eq!(
generated.len(),
4,
"generate_greedy must return max_new_tokens tokens"
);
},
Err(e) => {
let msg = e.to_string();
assert!(
msg.contains("forward") || msg.contains("Linear") || msg.contains("dimension"),
"unexpected error variant: {msg}"
);
},
}
}
#[test]
fn test_nemotron_generate_greedy_empty_input_error() {
use crate::nemotron::tasks::NemotronForCausalLM;
let cfg = small_nemotron_config();
let model = NemotronForCausalLM::new(cfg).expect("new");
let result = model.generate_greedy(&[], 3);
assert!(result.is_err(), "empty input must return Err");
let err_str = result.unwrap_err().to_string();
assert!(
err_str.to_lowercase().contains("empty"),
"error must mention 'empty', got: {err_str}"
);
}
#[test]
fn test_nemotron_default_rotary_dim() {
let cfg = NemotronConfig::default();
assert_eq!(cfg.rotary_dim(), 64, "default rotary_dim must be 64");
}
#[test]
fn test_nemotron_validate_vocab_size_zero() {
let cfg = NemotronConfig {
vocab_size: 0,
..NemotronConfig::default()
};
assert!(
cfg.validate().is_err(),
"vocab_size=0 should fail validation"
);
}
#[test]
fn test_nemotron_validate_hidden_size_zero() {
let cfg = NemotronConfig {
hidden_size: 0,
..NemotronConfig::default()
};
assert!(
cfg.validate().is_err(),
"hidden_size=0 should fail validation"
);
}
#[test]
fn test_nemotron_validate_head_dim_zero() {
let cfg = NemotronConfig {
head_dim: 0,
..NemotronConfig::default()
};
assert!(cfg.validate().is_err(), "head_dim=0 should fail validation");
}
#[test]
fn test_norm_type_partial_eq() {
assert_eq!(NormType::RmsNorm, NormType::RmsNorm);
assert_eq!(NormType::LayerNorm, NormType::LayerNorm);
assert_ne!(NormType::RmsNorm, NormType::LayerNorm);
}
}