syntaxdot_transformers/models/squeeze_bert/
config.rs1use serde::Deserialize;
2
3use crate::activations::Activation;
4use crate::models::bert::BertConfig;
5
6#[derive(Debug, Deserialize)]
8#[serde(default)]
9pub struct SqueezeBertConfig {
10 pub attention_probs_dropout_prob: f64,
11 pub embedding_size: i64,
12 pub hidden_act: Activation,
13 pub hidden_dropout_prob: f64,
14 pub hidden_size: i64,
15 pub initializer_range: f64,
16 pub intermediate_size: i64,
17 pub layer_norm_eps: f64,
18 pub max_position_embeddings: i64,
19 pub num_attention_heads: i64,
20 pub num_hidden_layers: i64,
21 pub type_vocab_size: i64,
22 pub vocab_size: i64,
23 pub q_groups: i64,
24 pub k_groups: i64,
25 pub v_groups: i64,
26 pub post_attention_groups: i64,
27 pub intermediate_groups: i64,
28 pub output_groups: i64,
29}
30
31impl Default for SqueezeBertConfig {
32 fn default() -> Self {
33 SqueezeBertConfig {
34 attention_probs_dropout_prob: 0.1,
35 embedding_size: 768,
36 hidden_act: Activation::Gelu,
37 hidden_dropout_prob: 0.1,
38 hidden_size: 768,
39 initializer_range: 0.02,
40 intermediate_size: 3072,
41 layer_norm_eps: 1e-12,
42 max_position_embeddings: 512,
43 num_attention_heads: 12,
44 num_hidden_layers: 12,
45 type_vocab_size: 2,
46 vocab_size: 30528,
47 q_groups: 4,
48 k_groups: 4,
49 v_groups: 4,
50 post_attention_groups: 1,
51 intermediate_groups: 4,
52 output_groups: 4,
53 }
54 }
55}
56
57impl From<&SqueezeBertConfig> for BertConfig {
58 fn from(squeeze_bert_config: &SqueezeBertConfig) -> Self {
59 BertConfig {
60 attention_probs_dropout_prob: squeeze_bert_config.attention_probs_dropout_prob,
61 hidden_act: squeeze_bert_config.hidden_act,
62 hidden_dropout_prob: squeeze_bert_config.hidden_dropout_prob,
63 hidden_size: squeeze_bert_config.hidden_size,
64 initializer_range: squeeze_bert_config.initializer_range,
65 intermediate_size: squeeze_bert_config.intermediate_size,
66 layer_norm_eps: squeeze_bert_config.layer_norm_eps,
67 max_position_embeddings: squeeze_bert_config.max_position_embeddings,
68 num_attention_heads: squeeze_bert_config.num_attention_heads,
69 num_hidden_layers: squeeze_bert_config.num_hidden_layers,
70 type_vocab_size: squeeze_bert_config.type_vocab_size,
71 vocab_size: squeeze_bert_config.vocab_size,
72 }
73 }
74}