1use rayon::prelude::*;
2
3use slop_algebra::Field;
4use slop_alloc::{buffer, Buffer, CpuBackend};
5
6use crate::{Dimensions, Tensor};
7
8pub fn sum_tensor_dim<T: Field>(src: &Tensor<T, CpuBackend>, dim: usize) -> Tensor<T, CpuBackend> {
10 let mut sizes = src.sizes().to_vec();
11 sizes.remove(dim);
12 let dimensions = Dimensions::try_from(sizes).unwrap();
13 let mut dst = Tensor { storage: buffer![], dimensions };
14 assert_eq!(dim, 0, "Only sum along the first dimension is supported");
15 let total_len = dst.total_len();
16 let dim_stride = src.strides()[dim];
17
18 let sums = src
19 .as_buffer()
20 .par_chunks_exact(dim_stride)
21 .fold(
22 || vec![T::zero(); total_len],
23 |mut acc, item| {
24 acc.iter_mut().zip(item).for_each(|(a, b)| *a += *b);
25 acc
26 },
27 )
28 .reduce(
29 || vec![T::zero(); total_len],
30 |mut a, b| {
31 a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a += *b);
32 a
33 },
34 );
35
36 let sums = Buffer::from(sums);
37 dst.storage = sums;
38 dst
39}
40
41impl<T: Field> Tensor<T, CpuBackend> {
42 pub fn sum(&self, dim: usize) -> Tensor<T, CpuBackend> {
44 sum_tensor_dim(self, dim)
45 }
46}
47
48#[cfg(test)]
49mod tests {
50 use slop_algebra::AbstractField;
51 use slop_baby_bear::BabyBear;
52
53 use super::*;
54
55 #[test]
56 fn test_sum() {
57 let mut rng = rand::thread_rng();
58
59 let sizes = [3, 4];
60
61 let a = Tensor::<BabyBear>::rand(&mut rng, sizes);
62 let b = a.sum(0);
63 for j in 0..sizes[1] {
64 let mut sum = BabyBear::zero();
65 for i in 0..sizes[0] {
66 sum += *a[[i, j]];
67 }
68 assert_eq!(sum, *b[[j]]);
69 }
70
71 let sizes = [3, 4, 5];
72
73 let a = Tensor::<BabyBear>::rand(&mut rng, sizes);
74 let b = a.sum(0);
75 for j in 0..sizes[1] {
76 for k in 0..sizes[2] {
77 let mut sum = BabyBear::zero();
78 for i in 0..sizes[0] {
79 sum += *a[[i, j, k]];
80 }
81 assert_eq!(sum, *b[[j, k]]);
82 }
83 }
84 }
85}