syntaxdot_transformers/models/bert/
config.rs

1use serde::Deserialize;
2
3use crate::activations::Activation;
4use crate::models::traits::WordEmbeddingsConfig;
5
6/// Bert model configuration.
7#[derive(Clone, Debug, Deserialize)]
8#[serde(default)]
9pub struct BertConfig {
10    pub attention_probs_dropout_prob: f64,
11    pub hidden_act: Activation,
12    pub hidden_dropout_prob: f64,
13    pub hidden_size: i64,
14    pub initializer_range: f64,
15    pub intermediate_size: i64,
16    pub layer_norm_eps: f64,
17    pub max_position_embeddings: i64,
18    pub num_attention_heads: i64,
19    pub num_hidden_layers: i64,
20    pub type_vocab_size: i64,
21    pub vocab_size: i64,
22}
23
24impl Default for BertConfig {
25    fn default() -> Self {
26        BertConfig {
27            attention_probs_dropout_prob: 0.1,
28            hidden_act: Activation::Gelu,
29            hidden_dropout_prob: 0.1,
30            hidden_size: 768,
31            initializer_range: 0.02,
32            intermediate_size: 3072,
33            layer_norm_eps: 1e-12,
34            max_position_embeddings: 512,
35            num_attention_heads: 12,
36            num_hidden_layers: 12,
37            type_vocab_size: 2,
38            vocab_size: 30000,
39        }
40    }
41}
42
43impl WordEmbeddingsConfig for BertConfig {
44    fn dims(&self) -> i64 {
45        self.hidden_size
46    }
47
48    fn dropout(&self) -> f64 {
49        self.hidden_dropout_prob
50    }
51
52    fn initializer_range(&self) -> f64 {
53        self.initializer_range
54    }
55
56    fn layer_norm_eps(&self) -> f64 {
57        self.layer_norm_eps
58    }
59
60    fn vocab_size(&self) -> i64 {
61        self.vocab_size
62    }
63}