1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
use std::time::Instant;
use gradients::{
correct_classes,
nn::{cce, cce_grad},
range, Conv2D, Linear, NeuralNetwork, OneHotMat, ReLU, Softmax,
};
use purpur::{CSVLoader, Converter};
#[derive(NeuralNetwork)]
pub struct Network<'a, T> {
conv: Conv2D<'a, T>,
lin1: Linear<'a, T, { 5 * 26 * 26 }, 128>,
relu1: ReLU<'a, T>,
lin2: Linear<'a, T, 128, 10>,
relu2: ReLU<'a, T>,
lin3: Linear<'a, T, 10, 10>,
softmax: Softmax<'a, T>,
}
#[test]
fn test_conv_net() -> custos::Result<()> {
let device = custos::CPU::new();
let loader = CSVLoader::new(true);
let loaded_data =
loader.load("../../gradients-fallback/datasets/digit-recognizer/train.csv")?;
//let loaded_data = loader.load("../../../datasets/mnist/mnist_train.csv").unwrap();
let i = Matrix::<f32>::from((
&device,
(loaded_data.sample_count, loaded_data.features),
&loaded_data.x,
));
let i = i / 255.;
let y = Matrix::from((&device, (loaded_data.sample_count, 1), &loaded_data.y));
let y = y.onehot();
let mut net: Network<f32> = Network {
conv: Conv2D::new(&device, (28, 28), (3, 3), 5),
lin1: Linear::new(&device),
lin2: Linear::new(&device),
lin3: Linear::new(&device),
..Default::default()
};
let mut opt = gradients::Adam::<f32>::new(0.001);
//let mut opt = gradients::SGD::new(0.1).momentum(0.8);
let start = Instant::now();
/*
let mut img = 0;
for epoch in range(100000) {
if img >= loaded_data.sample_count {
img = 0;
}
let drop = CPU::new();
let start = img*28*28;
let single_input = Matrix::from((&drop, 1, 28*28, &i[start..start+28*28]));
let start = img*10;
let single_y = Matrix::from((&drop, 1, 10, &y[start..start+10]));
let preds = net.forward(single_input);
//let correct_training = correct_classes(&loaded_data.y.as_usize(), preds) as f32;
let loss = cce(&device, &preds, &single_y);
if epoch % 100 == 0 {
println!("epoch: {epoch}, loss: {loss}");
}
/*println!(
"epoch: {epoch}, loss: {loss}, training_acc: {acc}",
acc = correct_training / loaded_data.sample_count() as f32
);*/
img +=1;
let grad = cce_grad(&device, &preds, &single_y);
net.backward(grad);
opt.step(&device, net.params());
}
*/
for epoch in range(100000) {
let preds = net.forward(&i);
let correct_training = correct_classes(&loaded_data.y.as_usize(), &preds) as f32;
let loss = cce(&device, &preds, &y);
//println!("epoch: {epoch}, loss: {loss}");
println!(
"epoch: {epoch}, loss: {loss}, training_acc: {acc}",
acc = correct_training / loaded_data.sample_count() as f32
);
let grad = cce_grad(&device, &preds, &y);
net.backward(&grad);
opt.step(&device, net.params());
}
println!("training duration: {:?}", start.elapsed());
Ok(())
}