1use crate as burn;
2
3use crate::config::Config;
4use crate::module::{Content, DisplaySettings, Module, ModuleDisplay};
5use crate::tensor::activation::silu;
6use crate::tensor::{backend::Backend, Tensor};
7
8use super::{Initializer, Linear, LinearConfig};
9
10#[derive(Config, Debug)]
12pub struct SwiGluConfig {
13 pub d_input: usize,
15 pub d_output: usize,
17 #[config(default = false)]
20 pub bias: bool,
21 #[config(
23 default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0), fan_out_only:false}"
24 )]
25 pub initializer: Initializer,
26}
27
28#[derive(Module, Debug)]
34#[module(custom_display)]
35pub struct SwiGlu<B: Backend> {
36 pub linear_inner: Linear<B>,
39 pub linear_outer: Linear<B>,
42}
43
44impl<B: Backend> ModuleDisplay for SwiGlu<B> {
45 fn custom_settings(&self) -> Option<DisplaySettings> {
46 DisplaySettings::new()
47 .with_new_line_after_attribute(false)
48 .optional()
49 }
50
51 fn custom_content(&self, content: Content) -> Option<Content> {
52 let [d_input, d_output] = self.linear_inner.weight.shape().dims();
53 content
54 .add("d_input", &d_input)
55 .add("d_output", &d_output)
56 .add("bias", &self.linear_inner.bias.is_some())
57 .optional()
58 }
59}
60
61impl SwiGluConfig {
62 pub fn init<B: Backend>(&self, device: &B::Device) -> SwiGlu<B> {
64 SwiGlu {
65 linear_inner: LinearConfig::new(self.d_input, self.d_output)
66 .with_bias(self.bias)
67 .with_initializer(self.initializer.clone())
68 .init(device),
69 linear_outer: LinearConfig::new(self.d_input, self.d_output)
70 .with_bias(self.bias)
71 .with_initializer(self.initializer.clone())
72 .init(device),
73 }
74 }
75}
76
77impl<B: Backend> SwiGlu<B> {
78 pub fn forward<const D: usize>(&self, input: Tensor<B, D>) -> Tensor<B, D> {
85 let x = self.linear_inner.forward(input.clone());
86 let x = silu(x);
87 x.mul(self.linear_outer.forward(input))
88 }
89}
90
91#[cfg(test)]
92mod tests {
93 use super::*;
94 use crate::TestBackend;
95
96 #[test]
97 fn test_swiglu_forward_no_bias() {
98 TestBackend::seed(0);
99 let device = Default::default();
100 let config = SwiGluConfig::new(3, 3).with_initializer(Initializer::Constant { value: 0.5 });
101 let swiglu = config.init(&device);
102 let input =
103 Tensor::<TestBackend, 2>::from_data([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], &device);
104 let output = swiglu.forward(input);
105 let expected_output = Tensor::<TestBackend, 2>::from_data(
106 [[8.5732, 8.5732, 8.5732], [56.2189, 56.2189, 56.2189]],
107 &device,
108 );
109 output
110 .to_data()
111 .assert_approx_eq(&expected_output.to_data(), 4);
112 }
113
114 #[test]
115 fn test_swiglu_forward_with_bias() {
116 TestBackend::seed(0);
117 let device = Default::default();
118 let config = SwiGluConfig::new(3, 3)
119 .with_bias(true)
120 .with_initializer(Initializer::Constant { value: 0.5 });
121 let swiglu = config.init(&device);
122 let input =
123 Tensor::<TestBackend, 2>::from_data([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], &device);
124 let output = swiglu.forward(input);
125 let expected_output = Tensor::<TestBackend, 2>::from_data(
126 [[11.8909, 11.8909, 11.8909], [63.9785, 63.9785, 63.9785]],
127 &device,
128 );
129 output
130 .to_data()
131 .assert_approx_eq(&expected_output.to_data(), 4);
132 }
133
134 #[test]
135 fn display() {
136 let config = SwiGluConfig::new(3, 5);
137 let swiglu = config.init::<TestBackend>(&Default::default());
138
139 assert_eq!(
140 alloc::format!("{}", swiglu),
141 "SwiGlu {d_input: 3, d_output: 5, bias: false, params: 30}"
142 );
143 }
144}