aha 0.2.6

aha model inference library, now supports Qwen(2.5VL/3/3VL/3.5/ASR/3Embedding/3Reranker), MiniCPM(4/5), VoxCPM(0.5B/1.5/2), DeepSeek-OCR/2, Hunyuan-OCR, PaddleOCR-VL/1.5, RMBG2.0, GLM(ASR-Nano-2512/OCR), Fun-ASR-Nano-2512, LFM(2/2.5/2VL/2.5VL)
Documentation
use serde::Deserialize;

#[derive(Clone, Debug, Deserialize)]
pub struct GPT2Config {
    pub activation_function: String,
    pub add_cross_attention: bool,
    pub attn_pdrop: f64,
    pub bad_words_ids: Option<Vec<u32>>,
    pub begin_suppress_tokens: Option<Vec<u32>>,
    pub bos_token_id: u32,
    pub chunk_size_feed_forward: usize,
    pub cross_attention_hidden_size: Option<usize>,
    pub decoder_start_token_id: Option<u32>,
    pub diversity_penalty: f64,
    pub do_sample: bool,
    pub dtype: Option<String>,
    pub early_stopping: bool,
    pub embd_pdrop: f64,
    pub encoder_no_repeat_ngram_size: usize,
    pub eos_token_id: u32,
    pub exponential_decay_length_penalty: Option<f64>,
    pub finetuning_task: Option<String>,
    pub forced_bos_token_id: Option<u32>,
    pub forced_eos_token_id: Option<u32>,
    pub id2label: std::collections::HashMap<usize, String>,
    pub initializer_range: f64,
    pub is_decoder: bool,
    pub is_encoder_decoder: bool,
    pub label2id: std::collections::HashMap<String, usize>,
    pub layer_norm_epsilon: f64,
    pub length_penalty: f64,
    pub max_length: usize,
    pub min_length: usize,
    pub model_type: String,
    pub n_ctx: usize,
    pub n_embd: usize,
    pub n_head: usize,
    pub n_inner: usize,
    pub n_layer: usize,
    pub n_positions: usize,
    pub no_repeat_ngram_size: usize,
    pub num_beam_groups: usize,
    pub num_beams: usize,
    pub num_return_sequences: usize,
    pub output_attentions: bool,
    pub output_hidden_states: bool,
    pub output_scores: bool,
    pub pad_token_id: u32,
    pub position_embedding_type: String,
    pub prefix: Option<String>,
    pub problem_type: Option<String>,
    pub remove_invalid_values: bool,
    pub reorder_and_upcast_attn: bool,
    pub repetition_penalty: f64,
    pub resid_pdrop: f64,
    pub return_dict: bool,
    pub return_dict_in_generate: bool,
    pub rope_base: f64,
    pub scale_attn_by_inverse_layer_idx: bool,
    pub scale_attn_weights: bool,
    pub sep_token_id: Option<u32>,
    pub summary_activation: Option<String>,
    pub summary_first_dropout: f64,
    pub summary_proj_to_labels: bool,
    pub summary_type: String,
    pub summary_use_proj: bool,
    pub suppress_tokens: Option<Vec<u32>>,
    pub task_specific_params: Option<serde_json::Value>,
    pub temperature: f64,
    pub tf_legacy_loss: bool,
    pub tie_encoder_decoder: bool,
    pub tie_word_embeddings: bool,
    pub tokenizer_class: Option<String>,
    pub top_k: usize,
    pub top_p: f64,
    pub torchscript: bool,
    pub typical_p: f64,
    pub use_bfloat16: bool,
    pub use_cache: bool,
    pub vocab_size: usize,
}