syntaxdot_transformers/models/albert/
config.rs1use serde::Deserialize;
2
3use crate::activations::Activation;
4use crate::models::bert::BertConfig;
5use crate::models::traits::WordEmbeddingsConfig;
6
7#[derive(Debug, Deserialize)]
9#[serde(default)]
10pub struct AlbertConfig {
11 pub attention_probs_dropout_prob: f64,
12 pub embedding_size: i64,
13 pub hidden_act: Activation,
14 pub hidden_dropout_prob: f64,
15 pub hidden_size: i64,
16 pub initializer_range: f64,
17 pub inner_group_num: i64,
18 pub intermediate_size: i64,
19 pub max_position_embeddings: i64,
20 pub num_attention_heads: i64,
21 pub num_hidden_groups: i64,
22 pub num_hidden_layers: i64,
23 pub type_vocab_size: i64,
24 pub vocab_size: i64,
25}
26
27impl Default for AlbertConfig {
28 fn default() -> Self {
29 AlbertConfig {
30 attention_probs_dropout_prob: 0.,
31 embedding_size: 128,
32 hidden_act: Activation::GeluNew,
33 hidden_dropout_prob: 0.,
34 hidden_size: 768,
35 initializer_range: 0.02,
36 inner_group_num: 1,
37 intermediate_size: 3072,
38 max_position_embeddings: 512,
39 num_attention_heads: 12,
40 num_hidden_groups: 1,
41 num_hidden_layers: 12,
42 type_vocab_size: 2,
43 vocab_size: 30000,
44 }
45 }
46}
47
48impl From<&AlbertConfig> for BertConfig {
49 fn from(albert_config: &AlbertConfig) -> Self {
50 BertConfig {
51 attention_probs_dropout_prob: albert_config.attention_probs_dropout_prob,
52 hidden_act: albert_config.hidden_act,
53 hidden_dropout_prob: albert_config.hidden_dropout_prob,
54 hidden_size: albert_config.hidden_size,
55 initializer_range: albert_config.initializer_range,
56 intermediate_size: albert_config.intermediate_size,
57 layer_norm_eps: 1e-12,
58 max_position_embeddings: albert_config.max_position_embeddings,
59 num_attention_heads: albert_config.num_attention_heads,
60 num_hidden_layers: albert_config.num_hidden_layers,
61 type_vocab_size: albert_config.type_vocab_size,
62 vocab_size: albert_config.vocab_size,
63 }
64 }
65}
66
67impl WordEmbeddingsConfig for AlbertConfig {
68 fn dims(&self) -> i64 {
69 self.embedding_size
70 }
71
72 fn dropout(&self) -> f64 {
73 self.hidden_dropout_prob
74 }
75
76 fn initializer_range(&self) -> f64 {
77 self.initializer_range
78 }
79
80 fn layer_norm_eps(&self) -> f64 {
81 1e-12
82 }
83
84 fn vocab_size(&self) -> i64 {
85 self.vocab_size
86 }
87}