#[derive(Debug, Clone)]
pub struct Mamba2Config {
pub vocab_size: usize,
pub d_model: usize,
pub n_layer: usize,
pub d_state: usize,
pub d_conv: usize,
pub expand: usize,
pub nheads: usize,
pub headdim: usize,
pub chunk_size: usize,
pub rms_norm_eps: f64,
pub tie_embeddings: bool,
}
impl Mamba2Config {
pub fn mamba2_2_7b() -> Self {
let d_model = 2560usize;
let expand = 2usize;
let nheads = 80usize;
let headdim = d_model * expand / nheads; Self {
vocab_size: 50280,
d_model,
n_layer: 64,
d_state: 128,
d_conv: 4,
expand,
nheads,
headdim,
chunk_size: 256,
rms_norm_eps: 1e-5,
tie_embeddings: true,
}
}
pub fn small_test() -> Self {
let d_model = 64usize;
let expand = 2usize;
let nheads = 4usize;
let headdim = d_model * expand / nheads;
Self {
vocab_size: 256,
d_model,
n_layer: 2,
d_state: 16,
d_conv: 4,
expand,
nheads,
headdim,
chunk_size: 64,
rms_norm_eps: 1e-5,
tie_embeddings: false,
}
}
pub fn inner_dim(&self) -> usize {
self.d_model * self.expand
}
pub fn validate(&self) -> bool {
self.headdim == self.inner_dim() / self.nheads
&& self.inner_dim().is_multiple_of(self.nheads)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mamba2_2_7b_vocab_size() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.vocab_size, 50280);
}
#[test]
fn test_mamba2_2_7b_d_model() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.d_model, 2560);
}
#[test]
fn test_mamba2_2_7b_n_layer() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.n_layer, 64);
}
#[test]
fn test_mamba2_2_7b_nheads() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.nheads, 80);
}
#[test]
fn test_mamba2_2_7b_headdim() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.headdim, 64); }
#[test]
fn test_mamba2_2_7b_d_state() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.d_state, 128);
}
#[test]
fn test_mamba2_2_7b_d_conv() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.d_conv, 4);
}
#[test]
fn test_mamba2_2_7b_chunk_size() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.chunk_size, 256);
}
#[test]
fn test_mamba2_2_7b_tie_embeddings() {
let cfg = Mamba2Config::mamba2_2_7b();
assert!(cfg.tie_embeddings);
}
#[test]
fn test_mamba2_validate_passes_2_7b() {
let cfg = Mamba2Config::mamba2_2_7b();
assert!(cfg.validate());
}
#[test]
fn test_mamba2_inner_dim_2_7b() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.inner_dim(), 2560 * 2);
}
#[test]
fn test_mamba2_small_test_config() {
let cfg = Mamba2Config::small_test();
assert_eq!(cfg.vocab_size, 256);
assert_eq!(cfg.d_model, 64);
assert_eq!(cfg.n_layer, 2);
assert_eq!(cfg.nheads, 4);
}
#[test]
fn test_mamba2_small_test_validate() {
let cfg = Mamba2Config::small_test();
assert!(cfg.validate());
}
#[test]
fn test_mamba2_small_test_headdim() {
let cfg = Mamba2Config::small_test();
assert_eq!(cfg.headdim, 64 * 2 / 4); }
#[test]
fn test_mamba2_small_test_tie_embeddings_false() {
let cfg = Mamba2Config::small_test();
assert!(!cfg.tie_embeddings);
}
#[test]
fn test_mamba2_validate_fails_inconsistent_headdim() {
let cfg = Mamba2Config {
headdim: 99,
..Mamba2Config::mamba2_2_7b()
};
assert!(!cfg.validate());
}
#[test]
fn test_mamba2_inner_dim_small() {
let cfg = Mamba2Config::small_test();
assert_eq!(cfg.inner_dim(), 64 * 2);
}
#[test]
fn test_mamba2_expand_factor() {
let cfg = Mamba2Config::mamba2_2_7b();
assert_eq!(cfg.expand, 2);
}
#[test]
fn test_mamba2_rms_norm_eps() {
let cfg = Mamba2Config::mamba2_2_7b();
assert!(cfg.rms_norm_eps > 0.0);
assert!(cfg.rms_norm_eps < 1e-3);
}
#[test]
fn test_mamba2_lcg_values_in_range() {
let mut s = 42u64;
s = s.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
let v = (s % 1000) as f32 / 1000.0;
assert!((0.0..1.0).contains(&v));
}
#[test]
fn test_mamba2_chunk_size_small() {
let cfg = Mamba2Config::small_test();
assert_eq!(cfg.chunk_size, 64);
}
}