use serde::Deserialize;
#[derive(Clone, Debug, Deserialize)]
pub struct GPT2Config {
pub activation_function: String,
pub add_cross_attention: bool,
pub attn_pdrop: f64,
pub bad_words_ids: Option<Vec<u32>>,
pub begin_suppress_tokens: Option<Vec<u32>>,
pub bos_token_id: u32,
pub chunk_size_feed_forward: usize,
pub cross_attention_hidden_size: Option<usize>,
pub decoder_start_token_id: Option<u32>,
pub diversity_penalty: f64,
pub do_sample: bool,
pub dtype: Option<String>,
pub early_stopping: bool,
pub embd_pdrop: f64,
pub encoder_no_repeat_ngram_size: usize,
pub eos_token_id: u32,
pub exponential_decay_length_penalty: Option<f64>,
pub finetuning_task: Option<String>,
pub forced_bos_token_id: Option<u32>,
pub forced_eos_token_id: Option<u32>,
pub id2label: std::collections::HashMap<usize, String>,
pub initializer_range: f64,
pub is_decoder: bool,
pub is_encoder_decoder: bool,
pub label2id: std::collections::HashMap<String, usize>,
pub layer_norm_epsilon: f64,
pub length_penalty: f64,
pub max_length: usize,
pub min_length: usize,
pub model_type: String,
pub n_ctx: usize,
pub n_embd: usize,
pub n_head: usize,
pub n_inner: usize,
pub n_layer: usize,
pub n_positions: usize,
pub no_repeat_ngram_size: usize,
pub num_beam_groups: usize,
pub num_beams: usize,
pub num_return_sequences: usize,
pub output_attentions: bool,
pub output_hidden_states: bool,
pub output_scores: bool,
pub pad_token_id: u32,
pub position_embedding_type: String,
pub prefix: Option<String>,
pub problem_type: Option<String>,
pub remove_invalid_values: bool,
pub reorder_and_upcast_attn: bool,
pub repetition_penalty: f64,
pub resid_pdrop: f64,
pub return_dict: bool,
pub return_dict_in_generate: bool,
pub rope_base: f64,
pub scale_attn_by_inverse_layer_idx: bool,
pub scale_attn_weights: bool,
pub sep_token_id: Option<u32>,
pub summary_activation: Option<String>,
pub summary_first_dropout: f64,
pub summary_proj_to_labels: bool,
pub summary_type: String,
pub summary_use_proj: bool,
pub suppress_tokens: Option<Vec<u32>>,
pub task_specific_params: Option<serde_json::Value>,
pub temperature: f64,
pub tf_legacy_loss: bool,
pub tie_encoder_decoder: bool,
pub tie_word_embeddings: bool,
pub tokenizer_class: Option<String>,
pub top_k: usize,
pub top_p: f64,
pub torchscript: bool,
pub typical_p: f64,
pub use_bfloat16: bool,
pub use_cache: bool,
pub vocab_size: usize,
}