syntaxdot_transformers/models/squeeze_bert/
config.rs

1use serde::Deserialize;
2
3use crate::activations::Activation;
4use crate::models::bert::BertConfig;
5
6/// SqueezeBert model configuration.
7#[derive(Debug, Deserialize)]
8#[serde(default)]
9pub struct SqueezeBertConfig {
10    pub attention_probs_dropout_prob: f64,
11    pub embedding_size: i64,
12    pub hidden_act: Activation,
13    pub hidden_dropout_prob: f64,
14    pub hidden_size: i64,
15    pub initializer_range: f64,
16    pub intermediate_size: i64,
17    pub layer_norm_eps: f64,
18    pub max_position_embeddings: i64,
19    pub num_attention_heads: i64,
20    pub num_hidden_layers: i64,
21    pub type_vocab_size: i64,
22    pub vocab_size: i64,
23    pub q_groups: i64,
24    pub k_groups: i64,
25    pub v_groups: i64,
26    pub post_attention_groups: i64,
27    pub intermediate_groups: i64,
28    pub output_groups: i64,
29}
30
31impl Default for SqueezeBertConfig {
32    fn default() -> Self {
33        SqueezeBertConfig {
34            attention_probs_dropout_prob: 0.1,
35            embedding_size: 768,
36            hidden_act: Activation::Gelu,
37            hidden_dropout_prob: 0.1,
38            hidden_size: 768,
39            initializer_range: 0.02,
40            intermediate_size: 3072,
41            layer_norm_eps: 1e-12,
42            max_position_embeddings: 512,
43            num_attention_heads: 12,
44            num_hidden_layers: 12,
45            type_vocab_size: 2,
46            vocab_size: 30528,
47            q_groups: 4,
48            k_groups: 4,
49            v_groups: 4,
50            post_attention_groups: 1,
51            intermediate_groups: 4,
52            output_groups: 4,
53        }
54    }
55}
56
57impl From<&SqueezeBertConfig> for BertConfig {
58    fn from(squeeze_bert_config: &SqueezeBertConfig) -> Self {
59        BertConfig {
60            attention_probs_dropout_prob: squeeze_bert_config.attention_probs_dropout_prob,
61            hidden_act: squeeze_bert_config.hidden_act,
62            hidden_dropout_prob: squeeze_bert_config.hidden_dropout_prob,
63            hidden_size: squeeze_bert_config.hidden_size,
64            initializer_range: squeeze_bert_config.initializer_range,
65            intermediate_size: squeeze_bert_config.intermediate_size,
66            layer_norm_eps: squeeze_bert_config.layer_norm_eps,
67            max_position_embeddings: squeeze_bert_config.max_position_embeddings,
68            num_attention_heads: squeeze_bert_config.num_attention_heads,
69            num_hidden_layers: squeeze_bert_config.num_hidden_layers,
70            type_vocab_size: squeeze_bert_config.type_vocab_size,
71            vocab_size: squeeze_bert_config.vocab_size,
72        }
73    }
74}