aha 0.2.5

aha model inference library, now supports Qwen(2.5VL/3/3VL/3.5/ASR/3Embedding/3Reranker), MiniCPM4, VoxCPM/1.5, DeepSeek-OCR/2, Hunyuan-OCR, PaddleOCR-VL/1.5, RMBG2.0, GLM(ASR-Nano-2512/OCR), Fun-ASR-Nano-2512, LFM(2/2.5/2VL/2.5VL)
Documentation
#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
pub struct DeepseekV2Config {
    pub bos_token_id: u32,
    pub eos_token_id: u32,
    pub first_k_dense_replace: usize,
    pub hidden_size: usize,
    pub intermediate_size: usize,
    pub kv_lora_rank: Option<usize>,
    pub lm_head: bool,
    pub max_position_embeddings: usize,
    pub moe_intermediate_size: usize,
    #[serde(default = "default_moe_layer_freq")]
    pub moe_layer_freq: usize,
    #[serde(default = "default_routed_scaling_factor")]
    pub routed_scaling_factor: f64,
    #[serde(default = "default_scoring_func")]
    pub scoring_func: String,
    #[serde(default = "default_aux_loss_alpha")]
    pub aux_loss_alpha: f32,
    #[serde(default = "default_true")]
    pub seq_aux: bool,
    #[serde(default = "default_false")]
    pub norm_topk_prob: bool,
    pub n_group: usize,
    pub n_routed_experts: usize,
    pub n_shared_experts: usize,
    pub num_attention_heads: usize,
    pub num_experts_per_tok: usize,
    pub num_hidden_layers: usize,
    pub num_key_value_heads: usize,
    pub q_lora_rank: Option<usize>,
    pub qk_nope_head_dim: usize,
    pub qk_rope_head_dim: usize,
    pub rm_head: bool,
    pub topk_group: usize,
    pub topk_method: String,
    pub torch_dtype: String,
    pub use_mla: bool,
    pub v_head_dim: usize,
    pub vocab_size: usize,
    #[serde(default = "default_rms_norm_eps")]
    pub rms_norm_eps: f64,
}

fn default_moe_layer_freq() -> usize {
    1
}
fn default_routed_scaling_factor() -> f64 {
    1.0
}
fn default_scoring_func() -> String {
    "softmax".to_string()
}
fn default_aux_loss_alpha() -> f32 {
    0.001
}
fn default_true() -> bool {
    true
}
fn default_false() -> bool {
    false
}
fn default_rms_norm_eps() -> f64 {
    1e-6
}

#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
pub struct ProjectorConfig {
    pub input_dim: usize,
    pub model_type: String,
    pub n_embed: usize,
    pub projector_type: String,
}

#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
pub struct ClipL14_224 {
    pub heads: usize,
    pub image_size: usize,
    pub layers: usize,
    pub patch_size: usize,
    pub width: usize,
}

#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
pub struct SamVitB {
    pub downsample_channels: Vec<usize>,
    pub global_attn_indexes: Vec<usize>,
    pub heads: usize,
    pub layers: usize,
    pub width: usize,
}

#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
pub struct Qwen2_0_5B {
    dim: usize,
}

#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
pub struct Width {
    #[serde(rename = "clip-l-14-224")]
    pub clip_l_14_224: Option<ClipL14_224>,
    #[serde(rename = "qwen2-0-5b")]
    pub qwen2_0_5b: Option<Qwen2_0_5B>,
    pub sam_vit_b: SamVitB,
}

#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
pub struct DeepseekOCRVisionConfig {
    pub image_size: usize,
    pub mlp_ratio: f32,
    pub width: Width,
}

#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
pub struct DeepseekOCRConfig {
    pub language_config: DeepseekV2Config,
    pub projector_config: ProjectorConfig,
    pub torch_dtype: String,
    pub vision_config: DeepseekOCRVisionConfig,
    pub bos_token_id: u32,
    pub eos_token_id: u32,
    pub first_k_dense_replace: u32,
    pub hidden_size: usize,
    pub intermediate_size: usize,
    pub kv_lora_rank: Option<usize>,
    pub lm_head: bool,
    pub max_position_embeddings: usize,
    pub moe_intermediate_size: usize,
    pub n_group: usize,
    pub n_routed_experts: usize,
    pub n_shared_experts: usize,
    pub num_attention_heads: usize,
    pub num_experts_per_tok: usize,
    pub num_hidden_layers: usize,
    pub num_key_value_heads: usize,
    pub q_lora_rank: Option<usize>,
    pub qk_nope_head_dim: usize,
    pub qk_rope_head_dim: usize,
    pub rm_head: bool,
    pub topk_group: usize,
    pub topk_method: String,
    pub use_mla: bool,
    pub v_head_dim: usize,
    pub vocab_size: usize,
}