syntaxdot_transformers/models/bert/
config.rs1use serde::Deserialize;
2
3use crate::activations::Activation;
4use crate::models::traits::WordEmbeddingsConfig;
5
6#[derive(Clone, Debug, Deserialize)]
8#[serde(default)]
9pub struct BertConfig {
10 pub attention_probs_dropout_prob: f64,
11 pub hidden_act: Activation,
12 pub hidden_dropout_prob: f64,
13 pub hidden_size: i64,
14 pub initializer_range: f64,
15 pub intermediate_size: i64,
16 pub layer_norm_eps: f64,
17 pub max_position_embeddings: i64,
18 pub num_attention_heads: i64,
19 pub num_hidden_layers: i64,
20 pub type_vocab_size: i64,
21 pub vocab_size: i64,
22}
23
24impl Default for BertConfig {
25 fn default() -> Self {
26 BertConfig {
27 attention_probs_dropout_prob: 0.1,
28 hidden_act: Activation::Gelu,
29 hidden_dropout_prob: 0.1,
30 hidden_size: 768,
31 initializer_range: 0.02,
32 intermediate_size: 3072,
33 layer_norm_eps: 1e-12,
34 max_position_embeddings: 512,
35 num_attention_heads: 12,
36 num_hidden_layers: 12,
37 type_vocab_size: 2,
38 vocab_size: 30000,
39 }
40 }
41}
42
43impl WordEmbeddingsConfig for BertConfig {
44 fn dims(&self) -> i64 {
45 self.hidden_size
46 }
47
48 fn dropout(&self) -> f64 {
49 self.hidden_dropout_prob
50 }
51
52 fn initializer_range(&self) -> f64 {
53 self.initializer_range
54 }
55
56 fn layer_norm_eps(&self) -> f64 {
57 self.layer_norm_eps
58 }
59
60 fn vocab_size(&self) -> i64 {
61 self.vocab_size
62 }
63}