#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ModelFormat {
#[default]
Oxidizr,
HuggingFace,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LayerType {
Mamba2,
Mamba3,
MlaWithMoe,
MlaWithMlp,
StandardTransformer,
}
#[derive(Debug, Clone)]
pub struct DetectedConfig {
pub num_layers: usize,
pub layer_types: Vec<LayerType>,
pub hidden_size: usize,
pub vocab_size: usize,
pub format: ModelFormat,
pub tie_word_embeddings: bool,
pub mamba2_num_heads: Option<usize>,
pub mamba2_head_dim: Option<usize>,
pub mamba2_state_size: Option<usize>,
pub mamba2_conv_kernel: Option<usize>,
pub mamba2_expand: Option<usize>,
pub mamba3_enabled: Option<bool>,
pub mamba3_complex_rope: Option<bool>,
pub mamba3_mimo_rank: Option<usize>,
pub mamba3_use_conv: Option<bool>,
pub num_attention_heads: Option<usize>,
pub kv_latent_dim: Option<usize>,
pub q_latent_dim: Option<usize>,
pub d_rope: Option<usize>,
pub num_experts: Option<usize>,
pub intermediate_size: Option<usize>,
pub shared_expert_enabled: bool,
pub num_kv_heads: Option<usize>,
pub head_dim: Option<usize>,
}
impl Default for DetectedConfig {
fn default() -> Self {
Self {
num_layers: 0,
layer_types: Vec::new(),
hidden_size: 0,
vocab_size: 0,
format: ModelFormat::Oxidizr,
tie_word_embeddings: false,
mamba2_num_heads: None,
mamba2_head_dim: None,
mamba2_state_size: None,
mamba2_conv_kernel: None,
mamba2_expand: None,
mamba3_enabled: None,
mamba3_complex_rope: None,
mamba3_mimo_rank: None,
mamba3_use_conv: None,
num_attention_heads: None,
kv_latent_dim: None,
q_latent_dim: None,
d_rope: None,
num_experts: None,
intermediate_size: None,
shared_expert_enabled: false,
num_kv_heads: None,
head_dim: None,
}
}
}