burn_nn/activation/
swiglu.rs1use burn_core as burn;
2
3use burn::config::Config;
4use burn::module::{Content, DisplaySettings, Initializer, Module, ModuleDisplay};
5use burn::tensor::activation::silu;
6use burn::tensor::{Tensor, backend::Backend};
7
8use crate::{Linear, LinearConfig, LinearLayout};
9
10#[derive(Config, Debug)]
12pub struct SwiGluConfig {
13 pub d_input: usize,
15 pub d_output: usize,
17 #[config(default = false)]
20 pub bias: bool,
21 #[config(
23 default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0), fan_out_only:false}"
24 )]
25 pub initializer: Initializer,
26 #[config(default = "LinearLayout::Row")]
28 pub layout: LinearLayout,
29}
30
31#[derive(Module, Debug)]
37#[module(custom_display)]
38pub struct SwiGlu<B: Backend> {
39 pub linear_inner: Linear<B>,
42 pub linear_outer: Linear<B>,
45}
46
47impl<B: Backend> ModuleDisplay for SwiGlu<B> {
48 fn custom_settings(&self) -> Option<DisplaySettings> {
49 DisplaySettings::new()
50 .with_new_line_after_attribute(false)
51 .optional()
52 }
53
54 fn custom_content(&self, content: Content) -> Option<Content> {
55 let [d_input, d_output] = self.linear_inner.weight.shape().dims();
56 content
57 .add("d_input", &d_input)
58 .add("d_output", &d_output)
59 .add("bias", &self.linear_inner.bias.is_some())
60 .optional()
61 }
62}
63
64impl SwiGluConfig {
65 pub fn init<B: Backend>(&self, device: &B::Device) -> SwiGlu<B> {
67 SwiGlu {
68 linear_inner: LinearConfig::new(self.d_input, self.d_output)
69 .with_bias(self.bias)
70 .with_initializer(self.initializer.clone())
71 .with_layout(self.layout)
72 .init(device),
73 linear_outer: LinearConfig::new(self.d_input, self.d_output)
74 .with_bias(self.bias)
75 .with_initializer(self.initializer.clone())
76 .with_layout(self.layout)
77 .init(device),
78 }
79 }
80}
81
82impl<B: Backend> SwiGlu<B> {
83 pub fn forward<const D: usize>(&self, input: Tensor<B, D>) -> Tensor<B, D> {
90 let x = self.linear_inner.forward(input.clone());
91 let x = silu(x);
92 x.mul(self.linear_outer.forward(input))
93 }
94}
95
96#[cfg(test)]
97mod tests {
98 use super::*;
99 use crate::TestBackend;
100 use burn::tensor::{Tolerance, ops::FloatElem};
101 type FT = FloatElem<TestBackend>;
102
103 #[test]
104 fn test_swiglu_forward_no_bias() {
105 let device = Default::default();
106 TestBackend::seed(&device, 0);
107
108 let config = SwiGluConfig::new(3, 3).with_initializer(Initializer::Constant { value: 0.5 });
109 let swiglu = config.init(&device);
110 let input =
111 Tensor::<TestBackend, 2>::from_data([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], &device);
112 let output = swiglu.forward(input);
113 let expected_output = Tensor::<TestBackend, 2>::from_data(
114 [[8.5732, 8.5732, 8.5732], [56.2189, 56.2189, 56.2189]],
115 &device,
116 );
117 output
118 .to_data()
119 .assert_approx_eq::<FT>(&expected_output.to_data(), Tolerance::default());
120 }
121
122 #[test]
123 fn test_swiglu_forward_with_bias() {
124 let device = Default::default();
125 TestBackend::seed(&device, 0);
126
127 let config = SwiGluConfig::new(3, 3)
128 .with_bias(true)
129 .with_initializer(Initializer::Constant { value: 0.5 });
130 let swiglu = config.init(&device);
131 let input =
132 Tensor::<TestBackend, 2>::from_data([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], &device);
133 let output = swiglu.forward(input);
134 let expected_output = Tensor::<TestBackend, 2>::from_data(
135 [[11.8909, 11.8909, 11.8909], [63.9785, 63.9785, 63.9785]],
136 &device,
137 );
138 output
139 .to_data()
140 .assert_approx_eq::<FT>(&expected_output.to_data(), Tolerance::default());
141 }
142
143 #[test]
144 fn display() {
145 let config = SwiGluConfig::new(3, 5);
146 let swiglu = config.init::<TestBackend>(&Default::default());
147
148 assert_eq!(
149 alloc::format!("{swiglu}"),
150 "SwiGlu {d_input: 3, d_output: 5, bias: false, params: 30}"
151 );
152 }
153}