rust_bert/common/
activations.rs

1use serde::{Deserialize, Serialize};
2use std::f64::consts::PI;
3use tch::Tensor;
4
5pub fn _gelu(x: &Tensor) -> Tensor {
6    x * 0.5 * (1.0 + (x / ((2.0_f64).sqrt())).erf())
7}
8
9pub fn _relu(x: &Tensor) -> Tensor {
10    x.relu()
11}
12
13pub fn _swish(x: &Tensor) -> Tensor {
14    x * x.sigmoid()
15}
16
17pub fn _mish(x: &Tensor) -> Tensor {
18    x * (x.softplus().tanh())
19}
20
21pub fn _gelu_new(x: &Tensor) -> Tensor {
22    x * 0.5 * (((x.pow_tensor_scalar(3.0f64) * 0.044715 + x) * ((2f64 / PI).sqrt())).tanh() + 1)
23}
24
25pub fn _tanh(x: &Tensor) -> Tensor {
26    x.tanh()
27}
28
29pub fn _identity(x: &Tensor) -> Tensor {
30    x.shallow_clone()
31}
32
33pub struct TensorFunction(Box<fn(&Tensor) -> Tensor>);
34
35impl TensorFunction {
36    pub fn new(fun: Box<fn(&Tensor) -> Tensor>) -> Self {
37        Self(fun)
38    }
39
40    pub fn get_fn(&self) -> &fn(&Tensor) -> Tensor {
41        &self.0
42    }
43}
44impl std::fmt::Debug for TensorFunction {
45    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
46        write!(f, "TensorFunction")
47    }
48}
49#[allow(non_camel_case_types)]
50#[derive(Clone, Debug, Serialize, Deserialize, Copy)]
51/// # Activation function used in the attention layer and masked language model head
52pub enum Activation {
53    /// Gaussian Error Linear Unit ([Hendrycks et al., 2016,](https://arxiv.org/abs/1606.08415))
54    gelu,
55    /// Rectified Linear Unit
56    relu,
57    /// Swish ([Ramachandran, 2017](https://arxiv.org/abs/1710.05941))
58    swish,
59    /// Mish ([Misra, 2019](https://arxiv.org/abs/1908.08681))
60    mish,
61    /// Gaussian Error Linear Unit (New) ([Hendrycks et al., 2016,](https://arxiv.org/abs/1606.08415))
62    gelu_new,
63    /// Tanh
64    tanh,
65    /// Identity
66    identity,
67}
68
69impl Activation {
70    pub fn get_function(&self) -> TensorFunction {
71        TensorFunction::new(Box::new(match self {
72            Activation::gelu => _gelu,
73            Activation::relu => _relu,
74            Activation::swish => _swish,
75            Activation::gelu_new => _gelu_new,
76            Activation::mish => _mish,
77            Activation::tanh => _tanh,
78            Activation::identity => _identity,
79        }))
80    }
81}
82
83#[cfg(test)]
84mod test {
85    use super::*;
86    #[test]
87    #[ignore]
88    fn tensorfunction_send() {
89        let _: Box<dyn Send> = Box::new(Activation::gelu.get_function());
90    }
91}