1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
use alloc::vec::Vec;
use crate::{element::FloatNdArrayElement, tensor::NdArrayTensor, NdArrayBackend, NdArrayDevice};
use burn_tensor::{ops::*, Shape};
use super::{
conv::conv2d,
maxpool::{max_pool2d, max_pool2d_backward, max_pool2d_with_indexes},
};
impl<E: FloatNdArrayElement> ModuleOps<NdArrayBackend<E>> for NdArrayBackend<E> {
fn embedding(
weights: NdArrayTensor<E, 2>,
indexes: NdArrayTensor<i64, 2>,
) -> NdArrayTensor<E, 3> {
let [batch_size, seq_length] = indexes.shape().dims;
let [_n_embedding, d_model] = weights.shape().dims;
let mut tensors = Vec::with_capacity(batch_size * seq_length);
for index in
NdArrayBackend::<E>::int_reshape(indexes, Shape::new([batch_size * seq_length]))
.array
.iter()
{
let index = *index as usize;
tensors.push(NdArrayBackend::index(
weights.clone(),
[index..index + 1, 0..d_model],
));
}
let embedding = NdArrayBackend::cat(tensors, 0);
NdArrayBackend::reshape(embedding, Shape::new([batch_size, seq_length, d_model]))
}
fn embedding_backward(
weights: NdArrayTensor<E, 2>,
output: NdArrayTensor<E, 3>,
indexes: NdArrayTensor<i64, 2>,
) -> NdArrayTensor<E, 2> {
let [batch_size, seq_length] = indexes.shape().dims;
let [_n_embedding, d_model] = weights.shape().dims;
let mut weights_grad = NdArrayBackend::zeros(weights.shape(), &NdArrayDevice::Cpu);
let output =
NdArrayBackend::reshape(output, Shape::new([batch_size * seq_length, d_model]));
for (index_output, index) in
NdArrayBackend::<E>::int_reshape(indexes, Shape::new([batch_size * seq_length]))
.array
.iter()
.enumerate()
{
let index = *index as usize;
let weights_grad_current =
NdArrayBackend::index(weights_grad.clone(), [index..index + 1, 0..d_model]);
let output_grad =
NdArrayBackend::index(output.clone(), [index_output..index_output + 1, 0..d_model]);
weights_grad = NdArrayBackend::index_assign(
weights_grad,
[index..index + 1, 0..d_model],
NdArrayBackend::add(output_grad, weights_grad_current),
);
}
weights_grad
}
fn conv2d(
x: NdArrayTensor<E, 4>,
weight: NdArrayTensor<E, 4>,
bias: Option<NdArrayTensor<E, 1>>,
stride: [usize; 2],
padding: [usize; 2],
) -> NdArrayTensor<E, 4> {
conv2d(x, weight, bias, stride, padding, [1, 1])
}
fn max_pool2d(
x: NdArrayTensor<E, 4>,
kernel_size: [usize; 2],
stride: [usize; 2],
padding: [usize; 2],
) -> NdArrayTensor<E, 4> {
max_pool2d(x, kernel_size, stride, padding)
}
fn max_pool2d_with_indexes(
x: NdArrayTensor<E, 4>,
kernel_size: [usize; 2],
stride: [usize; 2],
padding: [usize; 2],
) -> MaxPool2dWithIndexes<NdArrayBackend<E>> {
let (output, indexes) = max_pool2d_with_indexes(x, kernel_size, stride, padding);
MaxPool2dWithIndexes::new(output, indexes)
}
fn max_pool2d_with_indexes_backward(
x: NdArrayTensor<E, 4>,
kernel_size: [usize; 2],
stride: [usize; 2],
padding: [usize; 2],
output_grad: NdArrayTensor<E, 4>,
indexes: NdArrayTensor<i64, 4>,
) -> MaxPool2dBackward<NdArrayBackend<E>> {
MaxPool2dBackward::new(max_pool2d_backward(
x,
kernel_size,
stride,
padding,
output_grad,
indexes,
))
}
}