Skip to main content

entrenar/autograd/ops/
basic.rs

1//! Basic autograd operations: add, mul, scale, sum
2
3use crate::autograd::{BackwardOp, Tensor};
4use ndarray::Array1;
5use std::cell::RefCell;
6use std::rc::Rc;
7
8/// Add two tensors
9pub fn add(a: &Tensor, b: &Tensor) -> Tensor {
10    let data = a.data() + b.data();
11    let requires_grad = a.requires_grad() || b.requires_grad();
12
13    let mut result = Tensor::new(data, requires_grad);
14
15    if requires_grad {
16        let a_clone = a.clone();
17        let b_clone = b.clone();
18        let backward_op =
19            Rc::new(AddBackward { a: a_clone, b: b_clone, result_grad: result.grad_cell() });
20        result.set_backward_op(backward_op);
21    }
22
23    result
24}
25
26struct AddBackward {
27    a: Tensor,
28    b: Tensor,
29    result_grad: Rc<RefCell<Option<Array1<f32>>>>,
30}
31
32impl BackwardOp for AddBackward {
33    fn backward(&self) {
34        if let Some(grad) = self.result_grad.borrow().as_ref() {
35            if self.a.requires_grad() {
36                self.a.accumulate_grad(grad.clone());
37            }
38            if self.b.requires_grad() {
39                self.b.accumulate_grad(grad.clone());
40            }
41
42            // Recursively call backward on inputs
43            if let Some(op) = self.a.backward_op() {
44                op.backward();
45            }
46            if let Some(op) = self.b.backward_op() {
47                op.backward();
48            }
49        }
50    }
51}
52
53/// Multiply two tensors element-wise
54pub fn mul(a: &Tensor, b: &Tensor) -> Tensor {
55    let data = a.data() * b.data();
56    let requires_grad = a.requires_grad() || b.requires_grad();
57
58    let mut result = Tensor::new(data, requires_grad);
59
60    if requires_grad {
61        let a_clone = a.clone();
62        let b_clone = b.clone();
63        let backward_op =
64            Rc::new(MulBackward { a: a_clone, b: b_clone, result_grad: result.grad_cell() });
65        result.set_backward_op(backward_op);
66    }
67
68    result
69}
70
71struct MulBackward {
72    a: Tensor,
73    b: Tensor,
74    result_grad: Rc<RefCell<Option<Array1<f32>>>>,
75}
76
77impl BackwardOp for MulBackward {
78    fn backward(&self) {
79        if let Some(grad) = self.result_grad.borrow().as_ref() {
80            if self.a.requires_grad() {
81                // ∂L/∂a = ∂L/∂out * b
82                let grad_a = grad * self.b.data();
83                self.a.accumulate_grad(grad_a);
84            }
85            if self.b.requires_grad() {
86                // ∂L/∂b = ∂L/∂out * a
87                let grad_b = grad * self.a.data();
88                self.b.accumulate_grad(grad_b);
89            }
90
91            // Recursively call backward on inputs
92            if let Some(op) = self.a.backward_op() {
93                op.backward();
94            }
95            if let Some(op) = self.b.backward_op() {
96                op.backward();
97            }
98        }
99    }
100}
101
102/// Scale tensor by a scalar
103pub fn scale(a: &Tensor, factor: f32) -> Tensor {
104    let data = a.data() * factor;
105    let requires_grad = a.requires_grad();
106
107    let mut result = Tensor::new(data, requires_grad);
108
109    if requires_grad {
110        let a_clone = a.clone();
111        let backward_op =
112            Rc::new(ScaleBackward { a: a_clone, factor, result_grad: result.grad_cell() });
113        result.set_backward_op(backward_op);
114    }
115
116    result
117}
118
119struct ScaleBackward {
120    a: Tensor,
121    factor: f32,
122    result_grad: Rc<RefCell<Option<Array1<f32>>>>,
123}
124
125impl BackwardOp for ScaleBackward {
126    fn backward(&self) {
127        if let Some(grad) = self.result_grad.borrow().as_ref() {
128            if self.a.requires_grad() {
129                // ∂L/∂a = ∂L/∂out * factor
130                let grad_a = grad * self.factor;
131                self.a.accumulate_grad(grad_a);
132            }
133
134            if let Some(op) = self.a.backward_op() {
135                op.backward();
136            }
137        }
138    }
139}
140
141/// Add tensor b scaled by a factor to tensor a: result = a + scale * b
142///
143/// This is useful for LoRA: y = Wx + scale * (BA)x
144pub fn add_scaled(a: &Tensor, b: &Tensor, scale_factor: f32) -> Tensor {
145    assert_eq!(a.len(), b.len(), "Tensors must have same length");
146
147    // Compute a + scale * b
148    let a_data = a.data();
149    let b_data = b.data();
150    let result_data: Vec<f32> = a_data
151        .iter()
152        .zip(b_data.iter())
153        .map(|(&a_val, &b_val)| a_val + scale_factor * b_val)
154        .collect();
155
156    let requires_grad = a.requires_grad() || b.requires_grad();
157    let mut result = Tensor::new(Array1::from(result_data), requires_grad);
158
159    if requires_grad {
160        let a_clone = a.clone();
161        let b_clone = b.clone();
162        let backward_op = Rc::new(AddScaledBackward {
163            a: a_clone,
164            b: b_clone,
165            scale: scale_factor,
166            result_grad: result.grad_cell(),
167        });
168        result.set_backward_op(backward_op);
169    }
170
171    result
172}
173
174struct AddScaledBackward {
175    a: Tensor,
176    b: Tensor,
177    scale: f32,
178    result_grad: Rc<RefCell<Option<Array1<f32>>>>,
179}
180
181impl BackwardOp for AddScaledBackward {
182    fn backward(&self) {
183        if let Some(grad) = self.result_grad.borrow().as_ref() {
184            // ∂L/∂a = ∂L/∂result * 1 = grad
185            if self.a.requires_grad() {
186                self.a.accumulate_grad(grad.clone());
187            }
188
189            // ∂L/∂b = ∂L/∂result * scale = grad * scale
190            if self.b.requires_grad() {
191                let grad_b = grad * self.scale;
192                self.b.accumulate_grad(grad_b);
193            }
194
195            // Recursively call backward on inputs
196            if let Some(op) = self.a.backward_op() {
197                op.backward();
198            }
199            if let Some(op) = self.b.backward_op() {
200                op.backward();
201            }
202        }
203    }
204}
205
206/// Sum all elements
207pub fn sum(a: &Tensor) -> Tensor {
208    let data = Array1::from(vec![a.data().sum()]);
209    let requires_grad = a.requires_grad();
210
211    let mut result = Tensor::new(data, requires_grad);
212
213    if requires_grad {
214        let a_clone = a.clone();
215        let backward_op = Rc::new(SumBackward { a: a_clone, result_grad: result.grad_cell() });
216        result.set_backward_op(backward_op);
217    }
218
219    result
220}
221
222struct SumBackward {
223    a: Tensor,
224    result_grad: Rc<RefCell<Option<Array1<f32>>>>,
225}
226
227impl BackwardOp for SumBackward {
228    fn backward(&self) {
229        if let Some(grad) = self.result_grad.borrow().as_ref() {
230            if self.a.requires_grad() {
231                // ∂L/∂a = ∂L/∂sum * 1 (broadcast)
232                let grad_val = grad[0];
233                let grad_a = Array1::from(vec![grad_val; self.a.len()]);
234                self.a.accumulate_grad(grad_a);
235            }
236
237            if let Some(op) = self.a.backward_op() {
238                op.backward();
239            }
240        }
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use ndarray::Array1;
248
249    #[test]
250    fn test_add_forward() {
251        let a = Tensor::new(Array1::from(vec![1.0, 2.0, 3.0]), false);
252        let b = Tensor::new(Array1::from(vec![4.0, 5.0, 6.0]), false);
253        let result = add(&a, &b);
254
255        assert_eq!(result.data().as_slice().expect("operation should succeed"), &[5.0, 7.0, 9.0]);
256        assert!(!result.requires_grad());
257    }
258
259    #[test]
260    fn test_add_backward() {
261        let a = Tensor::new(Array1::from(vec![1.0, 2.0, 3.0]), true);
262        let b = Tensor::new(Array1::from(vec![4.0, 5.0, 6.0]), true);
263        let result = add(&a, &b);
264
265        assert!(result.requires_grad());
266
267        // Simulate gradient from downstream
268        result.accumulate_grad(Array1::from(vec![1.0, 1.0, 1.0]));
269
270        // Trigger backward
271        if let Some(op) = result.backward_op() {
272            op.backward();
273        }
274
275        // Both inputs should receive gradient of 1.0 (since d(a+b)/da = 1 and d(a+b)/db = 1)
276        let a_grad = a.grad().expect("gradient should be available");
277        let b_grad = b.grad().expect("gradient should be available");
278        assert_eq!(a_grad.as_slice().expect("operation should succeed"), &[1.0, 1.0, 1.0]);
279        assert_eq!(b_grad.as_slice().expect("operation should succeed"), &[1.0, 1.0, 1.0]);
280    }
281
282    #[test]
283    fn test_add_partial_grad() {
284        // Only a requires grad
285        let a = Tensor::new(Array1::from(vec![1.0, 2.0]), true);
286        let b = Tensor::new(Array1::from(vec![3.0, 4.0]), false);
287        let result = add(&a, &b);
288
289        result.accumulate_grad(Array1::from(vec![2.0, 3.0]));
290        if let Some(op) = result.backward_op() {
291            op.backward();
292        }
293
294        let a_grad = a.grad().expect("gradient should be available");
295        assert_eq!(a_grad.as_slice().expect("operation should succeed"), &[2.0, 3.0]);
296        assert!(b.grad().is_none());
297    }
298
299    #[test]
300    fn test_mul_forward() {
301        let a = Tensor::new(Array1::from(vec![2.0, 3.0, 4.0]), false);
302        let b = Tensor::new(Array1::from(vec![5.0, 6.0, 7.0]), false);
303        let result = mul(&a, &b);
304
305        assert_eq!(
306            result.data().as_slice().expect("operation should succeed"),
307            &[10.0, 18.0, 28.0]
308        );
309        assert!(!result.requires_grad());
310    }
311
312    #[test]
313    fn test_mul_backward() {
314        let a = Tensor::new(Array1::from(vec![2.0, 3.0]), true);
315        let b = Tensor::new(Array1::from(vec![4.0, 5.0]), true);
316        let result = mul(&a, &b);
317
318        assert!(result.requires_grad());
319
320        result.accumulate_grad(Array1::from(vec![1.0, 1.0]));
321        if let Some(op) = result.backward_op() {
322            op.backward();
323        }
324
325        // d(a*b)/da = b, d(a*b)/db = a
326        let a_grad = a.grad().expect("gradient should be available");
327        let b_grad = b.grad().expect("gradient should be available");
328        assert_eq!(a_grad.as_slice().expect("operation should succeed"), &[4.0, 5.0]); // grad = b
329        assert_eq!(b_grad.as_slice().expect("operation should succeed"), &[2.0, 3.0]);
330        // grad = a
331    }
332
333    #[test]
334    fn test_mul_partial_grad() {
335        let a = Tensor::new(Array1::from(vec![2.0, 3.0]), false);
336        let b = Tensor::new(Array1::from(vec![4.0, 5.0]), true);
337        let result = mul(&a, &b);
338
339        result.accumulate_grad(Array1::from(vec![1.0, 1.0]));
340        if let Some(op) = result.backward_op() {
341            op.backward();
342        }
343
344        assert!(a.grad().is_none());
345        let b_grad = b.grad().expect("gradient should be available");
346        assert_eq!(b_grad.as_slice().expect("operation should succeed"), &[2.0, 3.0]);
347    }
348
349    #[test]
350    fn test_scale_forward() {
351        let a = Tensor::new(Array1::from(vec![1.0, 2.0, 3.0]), false);
352        let result = scale(&a, 2.5);
353
354        assert_eq!(result.data().as_slice().expect("operation should succeed"), &[2.5, 5.0, 7.5]);
355        assert!(!result.requires_grad());
356    }
357
358    #[test]
359    fn test_scale_backward() {
360        let a = Tensor::new(Array1::from(vec![1.0, 2.0, 3.0]), true);
361        let result = scale(&a, 3.0);
362
363        assert!(result.requires_grad());
364
365        result.accumulate_grad(Array1::from(vec![1.0, 1.0, 1.0]));
366        if let Some(op) = result.backward_op() {
367            op.backward();
368        }
369
370        // d(scale*a)/da = scale
371        let a_grad = a.grad().expect("gradient should be available");
372        assert_eq!(a_grad.as_slice().expect("operation should succeed"), &[3.0, 3.0, 3.0]);
373    }
374
375    #[test]
376    fn test_scale_no_grad() {
377        let a = Tensor::new(Array1::from(vec![1.0, 2.0]), false);
378        let result = scale(&a, 5.0);
379
380        assert!(!result.requires_grad());
381        assert!(result.backward_op().is_none());
382    }
383
384    #[test]
385    fn test_add_scaled_forward() {
386        let a = Tensor::new(Array1::from(vec![1.0, 2.0, 3.0]), false);
387        let b = Tensor::new(Array1::from(vec![4.0, 5.0, 6.0]), false);
388        let result = add_scaled(&a, &b, 0.5);
389
390        // result = a + 0.5 * b = [1+2, 2+2.5, 3+3] = [3, 4.5, 6]
391        let expected = vec![3.0, 4.5, 6.0];
392        let actual = result.data().as_slice().expect("operation should succeed");
393        for (e, a) in expected.iter().zip(actual.iter()) {
394            assert!((e - a).abs() < 1e-6);
395        }
396    }
397
398    #[test]
399    fn test_add_scaled_backward() {
400        let a = Tensor::new(Array1::from(vec![1.0, 2.0]), true);
401        let b = Tensor::new(Array1::from(vec![3.0, 4.0]), true);
402        let result = add_scaled(&a, &b, 2.0);
403
404        result.accumulate_grad(Array1::from(vec![1.0, 1.0]));
405        if let Some(op) = result.backward_op() {
406            op.backward();
407        }
408
409        // d(a + scale*b)/da = 1, d(a + scale*b)/db = scale
410        let a_grad = a.grad().expect("gradient should be available");
411        let b_grad = b.grad().expect("gradient should be available");
412        assert_eq!(a_grad.as_slice().expect("operation should succeed"), &[1.0, 1.0]);
413        assert_eq!(b_grad.as_slice().expect("operation should succeed"), &[2.0, 2.0]);
414        // scale = 2.0
415    }
416
417    #[test]
418    fn test_add_scaled_partial_grad() {
419        let a = Tensor::new(Array1::from(vec![1.0, 2.0]), true);
420        let b = Tensor::new(Array1::from(vec![3.0, 4.0]), false);
421        let result = add_scaled(&a, &b, 0.5);
422
423        result.accumulate_grad(Array1::from(vec![2.0, 3.0]));
424        if let Some(op) = result.backward_op() {
425            op.backward();
426        }
427
428        let a_grad = a.grad().expect("gradient should be available");
429        assert_eq!(a_grad.as_slice().expect("operation should succeed"), &[2.0, 3.0]);
430        assert!(b.grad().is_none());
431    }
432
433    #[test]
434    #[should_panic(expected = "Tensors must have same length")]
435    fn test_add_scaled_length_mismatch() {
436        let a = Tensor::new(Array1::from(vec![1.0, 2.0]), false);
437        let b = Tensor::new(Array1::from(vec![3.0, 4.0, 5.0]), false);
438        let _ = add_scaled(&a, &b, 1.0);
439    }
440
441    #[test]
442    fn test_sum_forward() {
443        let a = Tensor::new(Array1::from(vec![1.0, 2.0, 3.0, 4.0]), false);
444        let result = sum(&a);
445
446        assert_eq!(result.data().as_slice().expect("operation should succeed"), &[10.0]);
447        assert!(!result.requires_grad());
448    }
449
450    #[test]
451    fn test_sum_backward() {
452        let a = Tensor::new(Array1::from(vec![1.0, 2.0, 3.0]), true);
453        let result = sum(&a);
454
455        assert!(result.requires_grad());
456
457        result.accumulate_grad(Array1::from(vec![2.0]));
458        if let Some(op) = result.backward_op() {
459            op.backward();
460        }
461
462        // d(sum(a))/da = [1, 1, 1], scaled by incoming grad
463        let a_grad = a.grad().expect("gradient should be available");
464        assert_eq!(a_grad.as_slice().expect("operation should succeed"), &[2.0, 2.0, 2.0]);
465    }
466
467    #[test]
468    fn test_sum_no_grad() {
469        let a = Tensor::new(Array1::from(vec![1.0, 2.0, 3.0]), false);
470        let result = sum(&a);
471
472        assert!(!result.requires_grad());
473        assert!(result.backward_op().is_none());
474    }
475
476    #[test]
477    fn test_chained_ops_backward() {
478        // Test: (a + b) * c, then sum
479        let a = Tensor::new(Array1::from(vec![1.0, 2.0]), true);
480        let b = Tensor::new(Array1::from(vec![3.0, 4.0]), true);
481        let c = Tensor::new(Array1::from(vec![2.0, 3.0]), true);
482
483        let ab = add(&a, &b); // [4, 6]
484        let abc = mul(&ab, &c); // [8, 18]
485        let result = sum(&abc); // 26
486
487        assert_eq!(result.data()[0], 26.0);
488
489        // Backward
490        result.accumulate_grad(Array1::from(vec![1.0]));
491        if let Some(op) = result.backward_op() {
492            op.backward();
493        }
494
495        // d(sum)/d(abc) = [1, 1]
496        // d(abc)/d(ab) = c = [2, 3]
497        // d(abc)/dc = ab = [4, 6]
498        // d(ab)/da = [1, 1], d(ab)/db = [1, 1]
499        // So: d/da = [2, 3], d/db = [2, 3], d/dc = [4, 6]
500        let a_grad = a.grad().expect("gradient should be available");
501        let b_grad = b.grad().expect("gradient should be available");
502        let c_grad = c.grad().expect("gradient should be available");
503
504        assert_eq!(a_grad.as_slice().expect("operation should succeed"), &[2.0, 3.0]);
505        assert_eq!(b_grad.as_slice().expect("operation should succeed"), &[2.0, 3.0]);
506        assert_eq!(c_grad.as_slice().expect("operation should succeed"), &[4.0, 6.0]);
507    }
508
509    #[test]
510    fn test_scale_chained_backward() {
511        let a = Tensor::new(Array1::from(vec![1.0, 2.0]), true);
512        let scaled = scale(&a, 3.0);
513        let result = sum(&scaled);
514
515        result.accumulate_grad(Array1::from(vec![1.0]));
516        if let Some(op) = result.backward_op() {
517            op.backward();
518        }
519
520        // d(sum(3*a))/da = 3 * [1, 1] = [3, 3]
521        let a_grad = a.grad().expect("gradient should be available");
522        assert_eq!(a_grad.as_slice().expect("operation should succeed"), &[3.0, 3.0]);
523    }
524}