ferrite/autograd/grad_fn/
activation.rs

1use crate::{reduce_grad, tensor::*};
2use super::super::grad::*;
3
4
5#[derive(Debug)]
6pub struct BinaryStepGrad {
7  lhs: Tensor,
8  output: Tensor,
9}
10
11impl BinaryStepGrad {
12  pub fn new(lhs: &Tensor, output: &Tensor) -> Self {
13    BinaryStepGrad {
14      lhs: lhs.clone(),
15      output: output.clone(),
16    }
17  }
18}
19
20impl GradientFunction for BinaryStepGrad {
21  fn backward(&self) {
22    if let Some(lhs_grad) = &self.lhs.grad() {
23      let zeros = Storage::zeros(self.lhs.tensor().shape().to_vec(), Some(self.lhs.device()), None);    
24      lhs_grad.borrow_mut().add_tensor_assign(&zeros);
25    }
26  }
27
28  fn prev(&self) -> Vec<&Tensor> {
29    vec![&self.lhs]
30  }
31}
32
33
34
35#[derive(Debug)]
36pub struct SigmoidGrad {
37  lhs: Tensor,
38  output: Tensor,
39}
40
41impl SigmoidGrad {
42  pub fn new(lhs: &Tensor, output: &Tensor) -> Self {
43    SigmoidGrad {
44      lhs: lhs.clone(),
45      output: output.clone(),
46    }
47  }
48}
49
50impl GradientFunction for SigmoidGrad {
51  fn backward(&self) {
52    // Get output gradient
53    let out_grad = self.output.grad().unwrap();
54    let out_grad = out_grad.borrow();
55
56    // Propagate to lhs
57    if let Some(lhs_grad) = &self.lhs.grad() {
58      let sigmoid_op = |x: f32| 1./(1. + f32::exp(-x));
59      let grad_for_lhs = &*out_grad * &self.lhs.storage.apply(|x| sigmoid_op(x) * (1. - sigmoid_op(x)));
60
61      let reduced_grad = reduce_grad!(grad_for_lhs, self.lhs.tensor().shape());
62    
63      lhs_grad.borrow_mut().add_tensor_assign(&reduced_grad);
64    }
65  }
66
67  fn prev(&self) -> Vec<&Tensor> {
68    vec![&self.lhs]
69  }
70}
71
72
73
74#[derive(Debug)]
75pub struct TanhGrad {
76  lhs: Tensor,
77  output: Tensor,
78}
79
80impl TanhGrad {
81  pub fn new(lhs: &Tensor, output: &Tensor) -> Self {
82    TanhGrad {
83      lhs: lhs.clone(),
84      output: output.clone(),
85    }
86  }
87}
88
89impl GradientFunction for TanhGrad {
90  fn backward(&self) {
91    // Get output gradient
92    let out_grad = self.output.grad().unwrap();
93    let out_grad = out_grad.borrow();
94
95    // Propagate to lhs
96    if let Some(lhs_grad) = &self.lhs.grad() {
97      let tanh_op = |x: f32| (f32::exp(x) - f32::exp(-x))/(f32::exp(x) + f32::exp(-x));
98      let grad_for_lhs = &*out_grad * &self.lhs.storage.apply(|x| 1. - tanh_op(x));
99
100      let reduced_grad = reduce_grad!(grad_for_lhs, self.lhs.tensor().shape());
101    
102      lhs_grad.borrow_mut().add_tensor_assign(&reduced_grad);
103    }
104  }
105
106  fn prev(&self) -> Vec<&Tensor> {
107    vec![&self.lhs]
108  }
109}
110
111
112#[derive(Debug)]
113pub struct ReluGrad {
114  lhs: Tensor,
115  output: Tensor,
116}
117
118impl ReluGrad {
119  pub fn new(lhs: &Tensor, output: &Tensor) -> Self {
120    ReluGrad {
121      lhs: lhs.clone(),
122      output: output.clone(),
123    }
124  }
125}
126
127impl GradientFunction for ReluGrad {
128  fn backward(&self) {
129    // Get output gradient
130    let out_grad = self.output.grad().unwrap();
131    let out_grad = out_grad.borrow();
132
133    // Propagate to lhs
134    if let Some(lhs_grad) = &self.lhs.grad() {
135      let grad_for_lhs = &*out_grad * &self.lhs.storage.apply(|x| if x <= 0. {0.} else {1.});
136      let reduced_grad = reduce_grad!(grad_for_lhs, self.lhs.tensor().shape());
137    
138      lhs_grad.borrow_mut().add_tensor_assign(&reduced_grad);
139    }
140  }
141
142  fn prev(&self) -> Vec<&Tensor> {
143    vec![&self.lhs]
144  }
145}
146
147
148
149#[derive(Debug)]
150pub struct LeakyReluGrad {
151  lhs: Tensor,
152  output: Tensor,
153}
154
155impl LeakyReluGrad {
156  pub fn new(lhs: &Tensor, output: &Tensor) -> Self {
157    LeakyReluGrad {
158      lhs: lhs.clone(),
159      output: output.clone(),
160    }
161  }
162}
163
164impl GradientFunction for LeakyReluGrad {
165  fn backward(&self) {
166    // Get output gradient
167    let out_grad = self.output.grad().unwrap();
168    let out_grad = out_grad.borrow();
169
170    // Propagate to lhs
171    if let Some(lhs_grad) = &self.lhs.grad() {
172      let grad_for_lhs = &*out_grad * &self.lhs.storage.apply(|x| if x <= 0. {0.1} else {1.});
173      let reduced_grad = reduce_grad!(grad_for_lhs, self.lhs.tensor().shape());
174    
175      lhs_grad.borrow_mut().add_tensor_assign(&reduced_grad);
176    }
177  }
178
179  fn prev(&self) -> Vec<&Tensor> {
180    vec![&self.lhs]
181  }
182}
183
184
185#[derive(Debug)]
186pub struct ParametricReluGrad {
187  lhs: Tensor,
188  a: f32,
189  output: Tensor,
190}
191
192impl ParametricReluGrad {
193  pub fn new(lhs: &Tensor, a: f32, output: &Tensor) -> Self {
194    ParametricReluGrad {
195      lhs: lhs.clone(),
196      a: a,
197      output: output.clone(),
198    }
199  }
200}
201
202impl GradientFunction for ParametricReluGrad {
203  fn backward(&self) {
204    // Get output gradient
205    let out_grad = self.output.grad().unwrap();
206    let out_grad = out_grad.borrow();
207
208    // Propagate to lhs
209    if let Some(lhs_grad) = &self.lhs.grad() {
210      let grad_for_lhs = &*out_grad * &self.lhs.storage.apply(|x| if x <= 0. {self.a} else {1.});
211      let reduced_grad = reduce_grad!(grad_for_lhs, self.lhs.tensor().shape());
212    
213      lhs_grad.borrow_mut().add_tensor_assign(&reduced_grad);
214    }
215  }
216
217  fn prev(&self) -> Vec<&Tensor> {
218    vec![&self.lhs]
219  }
220}
221
222
223
224
225#[derive(Debug)]
226pub struct EluGrad {
227  lhs: Tensor,
228  alpha: f32,
229  output: Tensor,
230}
231
232impl EluGrad {
233  pub fn new(lhs: &Tensor, alpha: f32, output: &Tensor) -> Self {
234    EluGrad {
235      lhs: lhs.clone(),
236      alpha: alpha,
237      output: output.clone(),
238    }
239  }
240}
241
242impl GradientFunction for EluGrad {
243  fn backward(&self) {
244    // Get output gradient
245    let out_grad = self.output.grad().unwrap();
246    let out_grad = out_grad.borrow();
247
248    // Propagate to lhs
249    if let Some(lhs_grad) = &self.lhs.grad() {
250      let grad_for_lhs = &*out_grad * &self.lhs.storage.apply(|x| if x <= 0. {self.alpha * f32::exp(x)} else {1.});
251      let reduced_grad = reduce_grad!(grad_for_lhs, self.lhs.tensor().shape());
252    
253      lhs_grad.borrow_mut().add_tensor_assign(&reduced_grad);
254    }
255  }
256
257  fn prev(&self) -> Vec<&Tensor> {
258    vec![&self.lhs]
259  }
260}
261
262
263
264
265#[derive(Debug)]
266pub struct SoftmaxGrad {
267  lhs: Tensor,
268  output: Tensor,
269}
270
271impl SoftmaxGrad {
272  pub fn new(lhs: &Tensor, output: &Tensor) -> Self {
273    SoftmaxGrad {
274      lhs: lhs.clone(),
275      output: output.clone(),
276    }
277  }
278}
279
280impl GradientFunction for SoftmaxGrad {
281  fn backward(&self) {
282    // Get output gradient
283    let out_grad = self.output.grad().unwrap();
284    let out_grad = out_grad.borrow();
285
286    // Propagate to lhs
287    if let Some(lhs_grad) = &self.lhs.grad() {
288      // Get the softmax output: s = softmax(x)
289      let s = self.output.tensor();
290
291      // Compute the elementwise product: (dL/ds * s)
292      let grad_times_s = &*out_grad * s;
293
294      // Determine the axis over which softmax was computed.
295      // For example, if softmax is computed over the last dimension:
296      let axis = s.shape().len() - 1;
297      
298      // Sum the product along the softmax axis. This gives, for each sample,
299      // the inner product \(\sum_j s_j * (dL/ds)_j\).
300      let sum_along_axis = grad_times_s.sum_axis(axis);
301      
302      // Efficient gradient for softmax:
303      // dL/dx = s * (dL/ds - sum(s * dL/ds))
304      let grad_for_lhs = s * &(&*out_grad - &sum_along_axis);
305
306      // If necessary, reduce the gradient to match the shape of the lhs Tensor.
307      let reduced_grad = reduce_grad!(grad_for_lhs, self.lhs.tensor().shape());
308      
309      // Add the computed gradient to the lhs gradient.
310      lhs_grad.borrow_mut().add_tensor_assign(&reduced_grad);
311    }
312  }
313
314  fn prev(&self) -> Vec<&Tensor> {
315    vec![&self.lhs]
316  }
317}
318
319
320
321#[derive(Debug)]
322pub struct SwishGrad {
323  lhs: Tensor,
324  output: Tensor,
325}
326
327impl SwishGrad {
328  pub fn new(lhs: &Tensor, output: &Tensor) -> Self {
329    SwishGrad {
330      lhs: lhs.clone(),
331      output: output.clone(),
332    }
333  }
334}
335
336impl GradientFunction for SwishGrad {
337  fn backward(&self) {
338    // Get output gradient
339    unimplemented!()
340  }
341
342  fn prev(&self) -> Vec<&Tensor> {
343    vec![&self.lhs]
344  }
345}