#![deny(missing_docs)]
#![cfg_attr(feature = "cargo-clippy", allow(unreadable_literal, redundant_field_names))]
#[macro_use]
extern crate serde_derive;
extern crate serde;
#[allow(unused_imports)]
#[macro_use]
extern crate ndarray;
extern crate rand;
extern crate rayon;
extern crate smallvec;
#[macro_use]
extern crate itertools;
extern crate hibitset;
pub type Arr = ndarray::Array2<f32>;
use std::cell::RefCell;
use std::clone::Clone;
use std::ops::{Add, Deref, Div, Mul, Neg, Sub};
use std::rc::Rc;
use itertools::Itertools;
mod fast_approx;
pub mod nn;
mod nodes;
mod numerics;
pub mod optim;
use nodes::*;
pub use nodes::{Bor, HogwildParameter, IndexInputNode, InputNode, Node, ParameterNode};
pub use numerics::simd_dot;
fn clamp(x: f32, min: f32, max: f32) -> f32 {
if x > max {
max
} else if x < min {
min
} else {
x
}
}
pub trait DataInput<T> {
fn set_value(&self, T);
}
fn merge_parameters(
xs: &[Variable<ParameterNode>],
ys: &[Variable<ParameterNode>],
) -> Vec<Variable<ParameterNode>> {
xs.iter()
.merge_join_by(ys.iter(), |x, y| x.as_ptr().cmp(&y.as_ptr()))
.map(|either| match either {
itertools::EitherOrBoth::Left(x) => x,
itertools::EitherOrBoth::Right(x) => x,
itertools::EitherOrBoth::Both(x, _) => x,
})
.cloned()
.collect()
}
#[derive(Debug)]
pub struct Variable<T>
where
T: Node,
{
node: Rc<T>,
grad: Option<RefCell<Arr>>,
parameters: Vec<Variable<ParameterNode>>,
}
impl<T: Node> Clone for Variable<T> {
fn clone(&self) -> Self {
Variable {
node: Rc::clone(&self.node),
grad: None,
parameters: self.parameters.clone(),
}
}
}
impl<T> Variable<T>
where
T: Node,
{
fn new(node: Rc<T>, parameters: Vec<Variable<ParameterNode>>) -> Self {
Variable {
node: node,
grad: None,
parameters: parameters,
}
}
pub fn value(&self) -> Bor<T::Value> {
self.node.value()
}
pub fn forward(&self) {
self.node.forward()
}
pub fn clear(&self) {
self.node.clear();
}
pub fn zero_gradient(&self) {
for param in self.parameters() {
param.node.zero_gradient();
}
}
pub fn parameters(&self) -> &[Variable<ParameterNode>] {
&self.parameters[..]
}
pub fn parameters_mut(&mut self) -> &mut [Variable<ParameterNode>] {
&mut self.parameters[..]
}
}
pub type BoxedNode = Rc<Node<Value = Arr, InputGradient = Arr>>;
impl<T> Variable<T>
where
T: Node<Value = Arr, InputGradient = Arr>,
{
pub fn boxed(&self) -> Variable<Rc<Node<Value = Arr, InputGradient = Arr>>> {
Variable::new(
Rc::new(self.node.clone() as Rc<Node<Value = Arr, InputGradient = Arr>>),
self.parameters.clone(),
)
}
pub fn backward(&mut self, weight: f32) {
let val = self.node.value();
self.grad
.get_or_insert_with(|| RefCell::new(val.map(|_| weight)))
.borrow_mut()
.as_slice_mut()
.unwrap()
.iter_mut()
.for_each(|x| *x = weight);
if let Some(ref grad) = self.grad {
self.node.backward(&grad.borrow());
}
}
pub fn clip(&self, min: f32, max: f32) {
let bor_value = self.node.value();
let value: &Arr = bor_value.deref();
let value = unsafe { &mut *(value as *const Arr as *mut Arr) };
value
.as_slice_mut()
.unwrap()
.iter_mut()
.for_each(|x| *x = 100.0 * clamp(*x, min, max));
}
pub fn square(&self) -> Variable<SquareNode<T>> {
Variable::new(
Rc::new(SquareNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn scalar_sum(&self) -> Variable<SumNode<T>> {
Variable::new(
Rc::new(SumNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn ln(&self) -> Variable<LogNode<T>> {
Variable::new(
Rc::new(LogNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn tanh(&self) -> Variable<TanhNode<T>> {
Variable::new(
Rc::new(TanhNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn t(&self) -> Variable<TransposeNode<T>> {
Variable::new(
Rc::new(TransposeNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn exp(&self) -> Variable<ExpNode<T>> {
Variable::new(
Rc::new(ExpNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn softmax(&self) -> Variable<SoftmaxNode<T>> {
Variable::new(
Rc::new(SoftmaxNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn log_softmax(&self) -> Variable<LogSoftmaxNode<T>> {
Variable::new(
Rc::new(LogSoftmaxNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn sigmoid(&self) -> Variable<SigmoidNode<T>> {
Variable::new(
Rc::new(SigmoidNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn relu(&self) -> Variable<ReluNode<T>> {
Variable::new(
Rc::new(ReluNode::new(Rc::clone(&self.node))),
self.parameters.clone(),
)
}
pub fn vector_dot<S>(&self, other: &Variable<S>) -> Variable<VectorDotNode<T, S>>
where
S: Node<Value = Arr, InputGradient = Arr>,
{
Variable::new(
Rc::new(VectorDotNode::new(
Rc::clone(&self.node),
Rc::clone(&other.node),
)),
merge_parameters(&self.parameters, &other.parameters),
)
}
pub fn dot<S>(&self, other: &Variable<S>) -> Variable<DotNode<T, S>>
where
S: Node<Value = Arr, InputGradient = Arr>,
{
Variable::new(
Rc::new(DotNode::new(Rc::clone(&self.node), Rc::clone(&other.node))),
merge_parameters(&self.parameters, &other.parameters),
)
}
pub fn stack<S>(
&self,
other: &Variable<S>,
axis: ndarray::Axis,
) -> Variable<ConcatenateNode<T, S>>
where
S: Node<Value = Arr, InputGradient = Arr>,
{
Variable::new(
Rc::new(ConcatenateNode::new(
Rc::clone(&self.node),
Rc::clone(&other.node),
axis,
)),
merge_parameters(&self.parameters, &other.parameters),
)
}
pub fn slice(
&self,
slice: &ndarray::SliceInfo<[ndarray::SliceOrIndex; 2], ndarray::Ix2>,
) -> Variable<SliceNode<T>> {
Variable::new(
Rc::new(SliceNode::new(Rc::clone(&self.node), slice)),
self.parameters.clone(),
)
}
}
impl Variable<ParameterNode> {
pub fn gradient(&self) -> Arr {
self.node.gradient.borrow().materialized_gradient()
}
fn as_ptr(&self) -> *const ParameterNode {
self.node.deref() as *const ParameterNode
}
pub fn index(&self, index: &Variable<IndexInputNode>) -> Variable<IndexNode<ParameterNode>> {
Variable::new(
Rc::new(IndexNode::new(
Rc::clone(&self.node),
Rc::clone(&index.node),
)),
merge_parameters(&self.parameters, &index.parameters),
)
}
}
impl<T> Variable<nn::losses::SparseCategoricalCrossentropyNode<T>>
where
T: Node<Value = Arr, InputGradient = Arr>,
{
pub fn predictions(&self) -> Bor<Arr> {
self.node.predictions()
}
}
impl<'value> DataInput<&'value Arr> for Variable<ParameterNode> {
fn set_value(&self, value: &Arr) {
let param_value = unsafe { &mut *(self.node.value.deref().value.as_ptr()) };
param_value.assign(value)
}
}
impl<'value> DataInput<&'value Arr> for Variable<InputNode> {
fn set_value(&self, value: &Arr) {
self.node.value.borrow_mut().assign(value);
}
}
impl DataInput<f32> for Variable<InputNode> {
fn set_value(&self, value: f32) {
self.node.value.borrow_mut()[(0, 0)] = value;
}
}
impl<'value> DataInput<&'value [usize]> for Variable<IndexInputNode> {
fn set_value(&self, value: &[usize]) {
let mut node_value = self.node.value.borrow_mut();
node_value.clear();
node_value.extend_from_slice(value);
}
}
impl DataInput<usize> for Variable<IndexInputNode> {
fn set_value(&self, value: usize) {
let mut node_value = self.node.value.borrow_mut();
node_value.clear();
node_value.push(value);
}
}
macro_rules! impl_arithmetic_op {
($trait:ident, $fn:ident, $node:ident) => {
impl<LHS, RHS> $trait<Variable<RHS>> for Variable<LHS>
where
RHS: Node<Value = Arr, InputGradient = Arr>,
LHS: Node<Value = Arr, InputGradient = Arr>,
{
type Output = Variable<$node<LHS, RHS>>;
fn $fn(self, other: Variable<RHS>) -> Self::Output {
Variable::new(
Rc::new($node::new(self.node, other.node)),
merge_parameters(&self.parameters, &other.parameters),
)
}
}
impl<LHS> $trait<f32> for Variable<LHS>
where
LHS: Node<Value = Arr, InputGradient = Arr>,
{
type Output = Variable<$node<LHS, InputNode>>;
fn $fn(self, other: f32) -> Self::Output {
let constant = InputNode::new(self.value().deref() * 0.0 + other);
Variable::new(
Rc::new($node::new(self.node, constant.node)),
merge_parameters(&self.parameters, &constant.parameters),
)
}
}
impl<RHS> $trait<Variable<RHS>> for f32
where
RHS: Node<Value = Arr, InputGradient = Arr>,
{
type Output = Variable<$node<InputNode, RHS>>;
fn $fn(self, other: Variable<RHS>) -> Self::Output {
let constant = InputNode::new(other.value().deref() * 0.0 + self);
Variable::new(
Rc::new($node::new(constant.node, other.node)),
merge_parameters(&constant.parameters, &other.parameters),
)
}
}
};
}
impl_arithmetic_op!(Add, add, AddNode);
impl_arithmetic_op!(Sub, sub, SubNode);
impl_arithmetic_op!(Mul, mul, MulNode);
impl_arithmetic_op!(Div, div, DivNode);
impl<T> Neg for Variable<T>
where
T: Node<Value = Arr, InputGradient = Arr>,
{
type Output = Variable<NegNode<T>>;
fn neg(self) -> Self::Output {
Variable::new(Rc::new(NegNode::new(self.node)), self.parameters.clone())
}
}
pub fn finite_difference<T>(
input: &mut Variable<ParameterNode>,
output: &mut Variable<T>,
) -> (Arr, Arr)
where
T: Node<Value = Arr, InputGradient = Arr>,
{
let delta_x = 1e-4;
let initial_input = { input.value().clone() };
let mut central_difference = &initial_input * 0.0;
for (idx, diff) in central_difference.indexed_iter_mut() {
let positive_difference = {
output.zero_gradient();
let mut changed_input = initial_input.clone();
changed_input[idx] += 0.5 * delta_x;
input.set_value(&changed_input);
output.forward();
output.backward(1.0);
output.value().clone()
};
let negative_difference = {
output.zero_gradient();
let mut changed_input = initial_input.clone();
changed_input[idx] -= 0.5 * delta_x;
input.set_value(&changed_input);
output.forward();
output.backward(1.0);
output.value().clone()
};
let central_difference = positive_difference - negative_difference;
*diff = central_difference.scalar_sum() / delta_x;
}
let gradient = {
output.zero_gradient();
input.set_value(&initial_input);
output.forward();
output.backward(1.0);
input.gradient()
};
output.zero_gradient();
(central_difference, gradient)
}
pub fn assert_close(x: &Arr, y: &Arr, tol: f32) {
assert!(
x.all_close(y, tol),
"{:#?} not within {} of {:#?}",
x,
tol,
y
);
}
#[cfg(test)]
mod tests {
use ndarray::arr2;
use optim::{Adagrad, Optimizer, SGD};
use rand::distributions::{Distribution, Uniform};
use rand::Rng;
use rayon::prelude::*;
use std::sync::Arc;
use super::optim::Synchronizable;
use super::*;
const TOLERANCE: f32 = 0.05;
fn random_matrix(rows: usize, cols: usize) -> Arr {
nn::xavier_normal(rows, cols)
}
fn random_index(rows: usize) -> usize {
Uniform::new(0, rows).sample(&mut rand::thread_rng())
}
#[test]
fn test_constant_sub() {
let mut x = ParameterNode::new(Arr::zeros((10, 10)) + 1.0);
let mut y = (1.0 - x.clone()) * 2.0;
assert_eq!(y.value().scalar_sum(), 0.0);
y.zero_gradient();
y.forward();
y.backward(1.0);
assert_eq!(y.value().scalar_sum(), 0.0);
let (difference, gradient) = finite_difference(&mut x, &mut y);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn parameter_deduplication() {
let x = ParameterNode::new(random_matrix(1, 1));
let y = ParameterNode::new(random_matrix(1, 1));
let z = x + y;
let z = z.clone() + z.clone();
assert_eq!(z.parameters().len(), 2);
}
#[test]
fn add_finite_difference() {
let mut x = ParameterNode::new(random_matrix(1, 1));
let mut y = ParameterNode::new(random_matrix(1, 1));
let mut z = x.clone() + y.clone() + x.clone() + x.clone();
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
let (difference, gradient) = finite_difference(&mut y, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn sub_finite_difference() {
let mut x = ParameterNode::new(random_matrix(1, 1));
let mut y = ParameterNode::new(random_matrix(1, 1));
let z = x.clone() - (y.clone() - x.clone());
let mut z = z.clone() * 2.0 + z.clone().sigmoid();
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
let (difference, gradient) = finite_difference(&mut y, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn mul_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 10));
let mut y = ParameterNode::new(random_matrix(10, 10));
let z = x.clone() * y.clone();
let mut z = z.clone() + z.clone();
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
let (difference, gradient) = finite_difference(&mut y, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn div_finite_difference() {
let mut x = ParameterNode::new(random_matrix(1, 1));
let y = ParameterNode::new(random_matrix(1, 1));
let mut z = (x.clone() + x.clone()) / y.clone();
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn vector_dot_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut y = ParameterNode::new(random_matrix(10, 5));
let z = x.vector_dot(&y);
let mut z = z.clone() + z.clone();
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
let (difference, gradient) = finite_difference(&mut y, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn dot_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut y = ParameterNode::new(random_matrix(5, 10));
let mut z = (x.clone() + x.clone()).dot(&y);
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
let (difference, gradient) = finite_difference(&mut y, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn dot_accumulation_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut y = ParameterNode::new(random_matrix(5, 10));
let z = x.clone().dot(&y);
let mut v = z.clone() * z.clone();
let (difference, gradient) = finite_difference(&mut x, &mut v);
assert_close(&difference, &gradient, TOLERANCE);
let (difference, gradient) = finite_difference(&mut y, &mut v);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn square_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut z = x.square();
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn ln_finite_difference() {
let mut x = ParameterNode::new(random_matrix(2, 2));
let mut z = (x.clone() + x.clone()).exp().ln();
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn tanh_finite_difference() {
let mut x = ParameterNode::new(random_matrix(2, 2));
let mut z = (x.clone() + x.clone()).tanh();
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn sum_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut z = (x.clone() + x.clone()).scalar_sum();
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE * 2.0);
}
#[test]
fn squared_sum_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut z = x.square().scalar_sum();
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn transpose_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut z = (x.clone() + x.clone()).t();
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn exp_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut z = (x.clone() + x.clone()).exp();
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn dot_square_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let y = ParameterNode::new(random_matrix(10, 5));
let mut z = x.vector_dot(&y).square();
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn sigmoid_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let z = (x.clone() + x.clone()).sigmoid();
let mut z = z.clone() + z.clone();
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn relu_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let z = (x.clone() + x.clone()).relu();
let mut z = z * 3.0;
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn neg_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut z = -(x.clone() + x.clone());
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn softmax_finite_difference() {
let mut x = ParameterNode::new(random_matrix(1, 10));
let mut z = (x.clone() + x.clone()).softmax();
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn log_softmax_finite_difference() {
let mut x = ParameterNode::new(random_matrix(1, 10));
let mut z = (x.clone() + x.clone()).log_softmax();
let v = (x.clone() + x.clone()).softmax().ln();
assert_close(v.value().deref(), z.value().deref(), TOLERANCE);
let (finite_difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn sparse_categorical_cross_entropy_finite_difference() {
let mut x = ParameterNode::new(random_matrix(1, 10));
let z = x.clone() + x.clone();
let idx = IndexInputNode::new(&vec![0][..]);
let mut loss = nn::losses::sparse_categorical_crossentropy(&z, &idx);
let (finite_difference, gradient) = finite_difference(&mut x, &mut loss);
assert_close(&finite_difference, &gradient, TOLERANCE);
}
#[test]
fn rowwise_stack_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut y = ParameterNode::new(random_matrix(10, 5));
let z = x.stack(&y, ndarray::Axis(0));
let mut z = z.clone().sigmoid() * z.clone().relu();
assert_eq!(z.value().rows(), 20);
assert_eq!(z.value().cols(), 5);
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
let (difference, gradient) = finite_difference(&mut y, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn columnwise_stack_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 5));
let mut y = ParameterNode::new(random_matrix(10, 5));
let mut z = x.stack(&y, ndarray::Axis(1)).sigmoid();
assert_eq!(z.value().rows(), 10);
assert_eq!(z.value().cols(), 10);
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
let (difference, gradient) = finite_difference(&mut y, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn columnwise_view_finite_difference() {
let mut x = ParameterNode::new(random_matrix(10, 30));
let x_0 = x.slice(s![.., 0..10]);
let x_1 = x.slice(s![.., 10..20]);
let x_2 = x.slice(s![.., 20..30]);
assert_eq!(x_0.value().rows(), 10);
assert_eq!(x_0.value().cols(), 10);
assert_eq!(x_1.value().rows(), 10);
assert_eq!(x_1.value().cols(), 10);
assert_eq!(x_2.value().rows(), 10);
assert_eq!(x_2.value().cols(), 10);
let mut z = (x_0 + x_1 + x_2).sigmoid();
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
#[test]
fn sparse_index_finite_difference() {
let mut x = ParameterNode::new(random_matrix(100, 5));
for _ in 0..10 {
let idx_0 = IndexInputNode::new(&[random_index(10)]);
let idx_1 = IndexInputNode::new(&[random_index(10)]);
let mut z = (x.index(&idx_0).tanh() * x.index(&idx_1)).square();
let (difference, gradient) = finite_difference(&mut x, &mut z);
assert_close(&difference, &gradient, TOLERANCE);
}
}
#[test]
fn univariate_regression() {
let slope = ParameterNode::new(random_matrix(1, 1));
let intercept = ParameterNode::new(random_matrix(1, 1));
let num_epochs = 200;
let x = InputNode::new(random_matrix(1, 1));
let y = InputNode::new(random_matrix(1, 1));
let y_hat = slope.clone() * x.clone() + intercept.clone();
let diff = y.clone() - y_hat.clone();
let mut loss = diff.square();
let optimizer = Adagrad::new().learning_rate(0.5);
for _ in 0..num_epochs {
let _x = arr2(&[[rand::thread_rng().gen()]]);
let _y = 0.5 * &_x + 0.2;
x.set_value(&_x);
y.set_value(&_y);
loss.forward();
loss.backward(1.0);
optimizer.step(loss.parameters());
}
println!(
"Predicted: {} Loss: {} Slope {} Intercept {}",
y_hat.value(),
loss.value(),
slope.value(),
intercept.value()
);
assert!(loss.value().scalar_sum() < 1.0e-2);
}
#[test]
fn multivariate_regression() {
let slope = ParameterNode::new(random_matrix(1, 3));
let intercept = ParameterNode::new(random_matrix(1, 1));
let num_epochs = 200;
let coefficients = arr2(&[[1.0], [2.0], [3.0]]);
let x = InputNode::new(random_matrix(1, 3));
let y = InputNode::new(random_matrix(1, 1));
let y_hat = x.vector_dot(&slope) + intercept.clone();
let diff = y.clone() - y_hat.clone();
let mut loss = diff.square();
let optimizer = SGD::new().learning_rate(0.1);
for _ in 0..num_epochs {
let _x = arr2(&[[
rand::thread_rng().gen(),
rand::thread_rng().gen(),
rand::thread_rng().gen(),
]]);
let _y = &_x.dot(&coefficients) + 5.0;
x.set_value(&_x);
y.set_value(&_y);
loss.forward();
loss.backward(1.0);
optimizer.step(loss.parameters());
}
println!(
"Predicted: {} Loss: {} Slope {} Intercept {}",
y_hat.value(),
loss.value(),
slope.value(),
intercept.value()
);
assert!(loss.value().scalar_sum() < 1.0e-1);
}
#[test]
fn embedding_factorization() {
let (rows, cols) = (10, 4);
let true_u = random_matrix(rows, 10);
let true_v = random_matrix(cols, 10);
let x = true_u.dot(&true_v.t());
let y = random_matrix(1, 1);
let u_input = vec![0];
let v_input = vec![0];
let output = InputNode::new(y);
let u_embedding = ParameterNode::new(random_matrix(rows, 10));
let v_embedding = ParameterNode::new(random_matrix(cols, 10));
let u_index = IndexInputNode::new(&u_input);
let v_index = IndexInputNode::new(&v_input);
let u_vec = u_embedding.index(&u_index);
let v_vec = v_embedding.index(&v_index);
let y_hat = u_vec.vector_dot(&v_vec);
let mut loss = (output.clone() - y_hat.clone()).square();
let num_epochs = 200;
let optimizer = Adagrad::new().learning_rate(0.1);
let mut loss_val = 0.0;
for _ in 0..num_epochs {
loss_val = 0.0;
for row_idx in 0..rows {
for col_idx in 0..cols {
u_index.set_value(row_idx);
v_index.set_value(col_idx);
output.set_value(x[(row_idx, col_idx)]);
loss.forward();
loss.backward(1.0);
loss_val += loss.value().scalar_sum();
optimizer.step(loss.parameters());
}
}
println!("Loss {}", loss_val)
}
assert!(loss_val < 1e-2);
}
#[test]
fn hogwild_embedding_factorization() {
let (rows, cols) = (10, 4);
let true_u = random_matrix(rows, 10);
let true_v = random_matrix(cols, 10);
let x = true_u.dot(&true_v.t());
let u_input = vec![0];
let v_input = vec![0];
let u_parameters = Arc::new(HogwildParameter::new(random_matrix(rows, 10)));
let v_parameters = Arc::new(HogwildParameter::new(random_matrix(cols, 10)));
let losses: Vec<f32> = (0..rayon::current_num_threads())
.into_par_iter()
.map(|_| {
let u_embedding = ParameterNode::shared(u_parameters.clone());
let v_embedding = ParameterNode::shared(v_parameters.clone());
let u_index = IndexInputNode::new(&u_input);
let v_index = IndexInputNode::new(&v_input);
let output = InputNode::new(random_matrix(1, 1));
let u_vec = u_embedding.index(&u_index);
let v_vec = v_embedding.index(&v_index);
let y_hat = u_vec.vector_dot(&v_vec);
let mut loss = (output.clone() - y_hat.clone()).square();
let num_epochs = 100;
let optimizer = SGD::new();
let mut loss_val = 0.0;
for _ in 0..num_epochs {
loss_val = 0.0;
for row_idx in 0..rows {
for col_idx in 0..cols {
u_index.set_value(row_idx);
v_index.set_value(col_idx);
output.set_value(x[(row_idx, col_idx)]);
loss.forward();
loss.backward(1.0);
loss_val += loss.value().scalar_sum();
optimizer.step(loss.parameters());
}
}
}
println!("Loss val {}", loss_val);
loss_val
})
.collect();
let sum_loss: f32 = losses.iter().sum();
assert!(sum_loss / (losses.len() as f32) < 1e-3);
}
#[test]
fn synchronized_embedding_factorization() {
let (rows, cols) = (10, 4);
let true_u = random_matrix(rows, 10);
let true_v = random_matrix(cols, 10);
let x = true_u.dot(&true_v.t());
let u_input = vec![0];
let v_input = vec![0];
let u_parameters = Arc::new(HogwildParameter::new(random_matrix(rows, 10)));
let v_parameters = Arc::new(HogwildParameter::new(random_matrix(cols, 10)));
let optimizer = SGD::new();
let losses: Vec<f32> = optimizer
.synchronized(rayon::current_num_threads())
.into_par_iter()
.map(|optimizer| {
let u_embedding = ParameterNode::shared(u_parameters.clone());
let v_embedding = ParameterNode::shared(v_parameters.clone());
let u_index = IndexInputNode::new(&u_input);
let v_index = IndexInputNode::new(&v_input);
let output = InputNode::new(random_matrix(1, 1));
let u_vec = u_embedding.index(&u_index);
let v_vec = v_embedding.index(&v_index);
let y_hat = u_vec.vector_dot(&v_vec);
let mut loss = (output.clone() - y_hat.clone()).square();
let num_epochs = 100;
let mut loss_val = 0.0;
for _ in 0..num_epochs {
loss_val = 0.0;
for row_idx in 0..rows {
for col_idx in 0..cols {
u_index.set_value(row_idx);
v_index.set_value(col_idx);
output.set_value(x[(row_idx, col_idx)]);
loss.forward();
loss.backward(1.0);
loss_val += loss.value().scalar_sum();
optimizer.step(loss.parameters());
}
}
}
println!("Loss val {}", loss_val);
loss_val
})
.collect();
let sum_loss: f32 = losses.iter().sum();
assert!(sum_loss / (losses.len() as f32) < 1e-3);
}
}