use opencl3::{
device::cl_float,
memory::Buffer
};
use intricate_macros::ActivationLayer;
use savefile_derive::Savefile;
use crate::utils::OpenCLState;
const PROGRAM_NAME: &str = "SIGMOID";
const PROGRAM_SOURCE: &str = include_str!("kernels/sigmoid.cl");
const PROPAGATE_KERNEL_NAME: &str = "propagate";
const BACK_PROPAGATE_KERNEL_NAME: &str = "back_propagate";
#[derive(Debug, Savefile, ActivationLayer)]
pub struct Sigmoid<'a> {
pub inputs_amount: usize,
#[savefile_ignore]
#[savefile_introspect_ignore]
pub last_inputs_buffer: Option<Buffer<cl_float>>,
#[savefile_ignore]
#[savefile_introspect_ignore]
pub last_outputs_buffer: Option<Buffer<cl_float>>,
#[savefile_ignore]
#[savefile_introspect_ignore]
opencl_state: Option<&'a OpenCLState>
}
#[cfg(test)]
mod sigmoid_tests {
use std::{f32::consts::E, ptr};
use opencl3::{
command_queue::{CL_BLOCKING, CL_NON_BLOCKING},
device::cl_float,
memory::{Buffer, CL_MEM_READ_ONLY}
};
use rand::{thread_rng, Rng};
use crate::{
layers::Layer,
utils::{approx_eq::assert_approx_equal_distance, setup_opencl, opencl::DeviceType},
};
use super::Sigmoid;
#[test]
fn should_propagate_to_correct_values() {
let state = setup_opencl(DeviceType::GPU).unwrap();
let context = &state.context;
let queue = state.queues.first().unwrap();
let samples_amount = 423;
let numbers_amount = 141;
let mut sigmoid = Sigmoid::new(numbers_amount);
sigmoid.init(&state).unwrap();
let mut rng = thread_rng();
let input_samples: Vec<f32> = (0..(samples_amount * numbers_amount))
.into_iter()
.map(|_| rng.gen_range(-1.0_f32..1.0_f32))
.collect();
let expected_outputs: Vec<f32> =
input_samples.iter().map(|x| 1.0 / (1.0 + E.powf(-x))).collect();
let mut input_samples_buffer = Buffer::<cl_float>::create(
&context,
CL_MEM_READ_ONLY,
numbers_amount * samples_amount,
ptr::null_mut(),
).unwrap();
queue
.enqueue_write_buffer(
&mut input_samples_buffer,
CL_BLOCKING,
0,
input_samples.as_slice(),
&[],
).unwrap()
.wait().unwrap();
let actual_outputs_buffer = sigmoid.propagate(&input_samples_buffer).unwrap();
let mut actual_outputs = vec![0.0; numbers_amount * samples_amount];
let actual_outputs_slice = actual_outputs.as_mut_slice();
queue
.enqueue_read_buffer(
&actual_outputs_buffer,
CL_BLOCKING,
0,
actual_outputs_slice,
&[],
).unwrap()
.wait().unwrap();
assert_approx_equal_distance(&expected_outputs, &actual_outputs, 0.01);
}
#[test]
fn should_back_propagate_returning_the_correct_derivatives(
) {
let state = setup_opencl(DeviceType::GPU).unwrap();
let context = &state.context;
let queue = state.queues.first().unwrap();
let samples_amount = 432;
let numbers_amount = 331;
let mut tanh = Sigmoid::new(numbers_amount);
tanh.init(&state).unwrap();
let mut rng = thread_rng();
let input_samples: Vec<f32> = (0..(samples_amount * numbers_amount))
.into_iter()
.map(|_| rng.gen_range(-1.0_f32..1.0_f32))
.collect();
let first_derivatives: Vec<f32> = (0..(samples_amount * numbers_amount))
.into_iter()
.map(|_| rng.gen_range(-1.0_f32..1.0_f32))
.collect();
let mut input_samples_buffer = Buffer::<cl_float>::create(
&context,
CL_MEM_READ_ONLY,
numbers_amount * samples_amount,
ptr::null_mut(),
).unwrap();
let mut first_derivatives_buffer = Buffer::<cl_float>::create(
&context,
CL_MEM_READ_ONLY,
numbers_amount * samples_amount,
ptr::null_mut(),
).unwrap();
queue
.enqueue_write_buffer(
&mut first_derivatives_buffer,
CL_BLOCKING,
0,
first_derivatives.as_slice(),
&[],
).unwrap()
.wait().unwrap();
queue
.enqueue_write_buffer(
&mut input_samples_buffer,
CL_BLOCKING,
0,
input_samples.as_slice(),
&[],
).unwrap()
.wait().unwrap();
tanh.propagate(&input_samples_buffer).unwrap();
let expected_loss_to_input_derivatives: Vec<Vec<f32>> = (0..samples_amount)
.into_iter()
.map(|i| {
(0..numbers_amount) .into_iter()
.map(|j| {
let input = input_samples[i * numbers_amount + j];
let sigmoid = 1.0 / (1.0 + E.powf(-input));
sigmoid
* (1.0 - sigmoid)
* (0..numbers_amount) .into_iter()
.map(|k| first_derivatives[i * numbers_amount + k])
.sum::<f32>()
})
.collect::<Vec<f32>>()
})
.collect();
let actual_loss_to_input_derivatives_buffer = tanh
.compute_loss_to_input_derivatives(&first_derivatives_buffer).unwrap();
let mut actual_loss_to_input_derivatives = vec![0.0; numbers_amount * samples_amount];
let actual_loss_to_input_derivatives_slice =
actual_loss_to_input_derivatives.as_mut_slice();
queue
.enqueue_read_buffer(
&actual_loss_to_input_derivatives_buffer,
CL_NON_BLOCKING,
0,
actual_loss_to_input_derivatives_slice,
&[],
).unwrap()
.wait().unwrap();
println!("derivatives CPU: {:?}", &expected_loss_to_input_derivatives,);
println!("\nderivatives GPU: {:?}", &actual_loss_to_input_derivatives);
assert_approx_equal_distance(
&actual_loss_to_input_derivatives,
&expected_loss_to_input_derivatives
.iter()
.map(|v| v.to_vec())
.flatten()
.collect(),
0.01,
);
}
}