use std::marker::PhantomData;
use cuda_runtime_sys::dim3;
use libc::c_uint;
use crate::device::{Device, DeviceCpu, DeviceGpu, DeviceMemoryPool};
use crate::{UnitValue};
use crate::arr::ShieldSlice;
use crate::cuda::{CudaMemoryPoolPtr, CudaMutPtr, kernel, Kernel};
use crate::cuda::kernel::optimizer::{AdagradArgs, AdamArgs, MomentumSGDArgs, RMSpropArgs, SGDArgs};
use crate::error::{OptimizerBuildError, TrainingError};
pub trait OptimizerBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
type Output: Optimizer<U,D>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError>;
}
pub trait Optimizer<U,D> where U: Clone + Copy + UnitValue<U>, D: Device<U> {
type InternalType: ?Sized;
type InternalUpdateType<'a>: ?Sized;
fn update<'a>(&mut self, e:&'a Self::InternalType, w:Self::InternalUpdateType<'a>) -> Result<(),TrainingError>;
}
pub trait OptimizerState<U,D> where U: Clone + Copy + UnitValue<U>, D: Device<U> {
type Type;
}
pub struct SGD<U,D> where U: UnitValue<U>, D: Device<U> {
d:PhantomData<D>,
size: usize,
lr:U,
weight_decay:U
}
impl<U,D> SGD<U,D> where U: UnitValue<U>, D: Device<U> {
pub fn new(size: usize,lr:U) -> SGD<U,D> {
SGD {
d:PhantomData::<D>,
size:size,
lr:lr,
weight_decay:U::default()
}
}
pub fn with_params(size: usize,lr:U,weight_decay:U) -> SGD<U,D> {
SGD {
d:PhantomData::<D>,
size:size,
lr:lr,
weight_decay:weight_decay,
}
}
}
impl<U> Optimizer<U,DeviceCpu<U>> for SGD<U,DeviceCpu<U>> where U: UnitValue<U>, DeviceCpu<U>: Device<U> {
type InternalType = [U];
type InternalUpdateType<'a> = ShieldSlice<'a,U>;
#[inline]
fn update<'a>(&mut self, e: &'a [U], w: Self::InternalUpdateType<'a>) -> Result<(),TrainingError> {
let mut w = w;
let a = self.lr;
let weight_decay = self.weight_decay;
for (w,&e) in w.iter_mut().zip(e.iter()) {
*w = *w - a * (e + weight_decay * *w);
}
Ok(())
}
}
impl<U> Optimizer<U,DeviceGpu<U>> for SGD<U,DeviceGpu<U>>
where U: UnitValue<U>, DeviceGpu<U>: Device<U>,
for<'a> kernel::optimizer::SGD<'a,U>: Kernel<Args=SGDArgs<'a,U>> {
type InternalType = CudaMemoryPoolPtr<U>;
type InternalUpdateType<'a> = CudaMutPtr<'a,CudaMemoryPoolPtr<U>>;
#[inline]
fn update<'a>(&mut self, e: &'a CudaMemoryPoolPtr<U>, w: CudaMutPtr<'a,CudaMemoryPoolPtr<U>>) -> Result<(),TrainingError> {
let mut w = w;
let mut args = SGDArgs::new(&mut w,e,self.size,self.lr,self.weight_decay);
let mut kernel = kernel::optimizer::SGD::<'_,U>::new();
kernel.launch(dim3 { x: (self.size as c_uint + 1023) / 1024, y: 1, z: 1 },
dim3 { x: 1024, y: 1, z: 1 },
&mut args, 0)?;
Ok(())
}
}
pub struct SGDBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
d:PhantomData<D>,
lr:U,
weight_decay:U
}
impl<U,D> SGDBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
pub fn new(_:&D) -> SGDBuilder<U,D> {
SGDBuilder {
d:PhantomData::<D>,
lr:U::from_f64(0.001).expect("Error in type conversion from f64."),
weight_decay:U::default()
}
}
pub fn lr(self,lr:U) -> SGDBuilder<U,D> {
SGDBuilder {
d:PhantomData::<D>,
lr:lr,
weight_decay:self.weight_decay,
}
}
pub fn weight_decay(self,weight_decay:U) -> SGDBuilder<U,D> {
SGDBuilder {
d:PhantomData::<D>,
lr:self.lr,
weight_decay:weight_decay,
}
}
}
impl<U,D> OptimizerBuilder<U,D> for SGDBuilder<U,D> where U: UnitValue<U>, D: Device<U>, SGD<U,D>: Optimizer<U,D> {
type Output = SGD<U,D>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError> {
Ok(SGD::with_params(size,self.lr,self.weight_decay))
}
}
pub struct MomentumSGD<U,D>
where U: UnitValue<U>, D: Device<U>,
Self: OptimizerState<U,D> {
d:PhantomData<D>,
size:usize,
lr:U,
mu:U,
weight_decay:U,
vt:<Self as OptimizerState<U,D>>::Type
}
impl<U> MomentumSGD<U,DeviceCpu<U>> where U: UnitValue<U> {
pub fn new(_:&DeviceCpu<U>,size:usize,lr:U) -> MomentumSGD<U,DeviceCpu<U>> {
MomentumSGD {
d:PhantomData::<DeviceCpu<U>>,
size:size,
lr:lr,
mu:U::from_f64(0.9).expect("Error in type conversion from f64."),
weight_decay:U::default(),
vt:vec![U::default();size].into_boxed_slice()
}
}
pub fn with_params(_:&DeviceCpu<U>,size:usize,lr:U,mu:U,weight_decay:U) -> MomentumSGD<U,DeviceCpu<U>> {
MomentumSGD {
d:PhantomData::<DeviceCpu<U>>,
size:size,
lr:lr,
mu:mu,
weight_decay:weight_decay,
vt:vec![U::default();size].into_boxed_slice()
}
}
}
impl<U> Optimizer<U,DeviceCpu<U>> for MomentumSGD<U,DeviceCpu<U>> where U: UnitValue<U> {
type InternalType = [U];
type InternalUpdateType<'a> = ShieldSlice<'a,U>;
#[inline]
fn update<'a>(&mut self, e: &[U], w: Self::InternalUpdateType<'a>) -> Result<(),TrainingError> {
let mut w = w;
let a = self.lr;
let mu = self.mu;
let weight_decay = self.weight_decay;
for ((w,&e),vt) in w.iter_mut().zip(e.iter()).zip(self.vt.iter_mut()) {
*vt = mu * *vt - a * (e + weight_decay * *w);
*w = *w + *vt;
}
Ok(())
}
}
impl<U> MomentumSGD<U,DeviceGpu<U>> where U: UnitValue<U>, DeviceGpu<U>: Device<U> {
pub fn new(device:&DeviceGpu<U>,size:usize,lr:U)
-> Result<MomentumSGD<U,DeviceGpu<U>>,OptimizerBuildError> {
Ok(MomentumSGD {
d:PhantomData::<DeviceGpu<U>>,
size:size,
lr:lr,
mu:U::from_f64(0.9).expect("Error in type conversion from f64."),
weight_decay:U::default(),
vt: CudaMemoryPoolPtr::with_initializer(size,device.get_memory_pool(),Default::default)?
})
}
pub fn with_params(device:&DeviceGpu<U>,size:usize,lr:U,mu:U,weight_decay:U)
-> Result<MomentumSGD<U,DeviceGpu<U>>,OptimizerBuildError> {
Ok(MomentumSGD {
d:PhantomData::<DeviceGpu<U>>,
size:size,
lr:lr,
mu:mu,
weight_decay:weight_decay,
vt:CudaMemoryPoolPtr::with_initializer(size,device.get_memory_pool(),Default::default)?
})
}
}
impl<U> Optimizer<U,DeviceGpu<U>> for MomentumSGD<U,DeviceGpu<U>>
where U: UnitValue<U>,
DeviceGpu<U>: Device<U>,
for<'a> kernel::optimizer::MomentumSGD<'a,U>: Kernel<Args=MomentumSGDArgs<'a,U>> {
type InternalType = CudaMemoryPoolPtr<U>;
type InternalUpdateType<'a> = CudaMutPtr<'a,CudaMemoryPoolPtr<U>>;
#[inline]
fn update<'a>(&mut self, e: &CudaMemoryPoolPtr<U>, w: CudaMutPtr<'a,CudaMemoryPoolPtr<U>>) -> Result<(),TrainingError> {
let mut w = w;
let mut args = MomentumSGDArgs::new(&mut w,e,self.size,self.lr,self.mu,self.weight_decay,&mut self.vt);
let mut kernel = kernel::optimizer::MomentumSGD::<'_,U>::new();
kernel.launch(dim3 { x: (self.size as c_uint + 1023) / 1024, y: 1, z: 1 },
dim3 { x: 1024, y: 1, z: 1 },
&mut args, 0)?;
Ok(())
}
}
impl<U> OptimizerState<U,DeviceCpu<U>> for MomentumSGD<U,DeviceCpu<U>>
where U: UnitValue<U>,
DeviceCpu<U>: Device<U> {
type Type = Box<[U]>;
}
impl<U> OptimizerState<U,DeviceGpu<U>> for MomentumSGD<U,DeviceGpu<U>>
where U: UnitValue<U>,
DeviceGpu<U>: Device<U> {
type Type = CudaMemoryPoolPtr<U>;
}
pub struct MomentumSGDBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
device:D,
lr:U,
mu:U,
weight_decay:U
}
impl<U,D> MomentumSGDBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
pub fn new(device:&D) -> MomentumSGDBuilder<U,D> {
MomentumSGDBuilder {
device:device.clone(),
lr:U::from_f64(0.001).expect("Error in type conversion from f64."),
mu:U::from_f64(0.9).expect("Error in type conversion from f64."),
weight_decay:U::default()
}
}
pub fn lr(self,lr:U) -> MomentumSGDBuilder<U,D> {
MomentumSGDBuilder {
device:self.device,
lr:lr,
mu:self.mu,
weight_decay:self.weight_decay,
}
}
pub fn weight_decay(self,weight_decay:U) -> MomentumSGDBuilder<U,D> {
MomentumSGDBuilder {
device:self.device,
lr:self.lr,
mu:self.mu,
weight_decay:weight_decay
}
}
pub fn mu(self,mu:U) -> MomentumSGDBuilder<U,D> {
MomentumSGDBuilder {
device:self.device,
lr:self.lr,
mu:mu,
weight_decay:self.weight_decay,
}
}
}
impl<U> OptimizerBuilder<U,DeviceCpu<U>> for MomentumSGDBuilder<U,DeviceCpu<U>>
where U: UnitValue<U>, MomentumSGD<U,DeviceCpu<U>>: Optimizer<U,DeviceCpu<U>> {
type Output = MomentumSGD<U,DeviceCpu<U>>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError> {
Ok(MomentumSGD::<_,DeviceCpu<U>>::with_params(&self.device,size,self.lr,self.mu,self.weight_decay))
}
}
impl<U> OptimizerBuilder<U,DeviceGpu<U>> for MomentumSGDBuilder<U,DeviceGpu<U>>
where U: UnitValue<U>, DeviceGpu<U>: Device<U>,
MomentumSGD<U,DeviceGpu<U>>: Optimizer<U,DeviceGpu<U>> {
type Output = MomentumSGD<U,DeviceGpu<U>>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError> {
MomentumSGD::<_,DeviceGpu<U>>::with_params(&self.device,size,self.lr,self.mu,self.weight_decay)
}
}
pub struct Adagrad<U,D>
where U: UnitValue<U>, D: Device<U>,
Self: OptimizerState<U,D> {
d:PhantomData<D>,
size:usize,
lr:U,
gt:<Self as OptimizerState<U,D>>::Type,
weight_decay:U,
eps:U
}
impl<U> Adagrad<U,DeviceCpu<U>> where U: UnitValue<U> {
pub fn new(device:&DeviceCpu<U>,size:usize) -> Adagrad<U,DeviceCpu<U>> {
Adagrad::<U,DeviceCpu<U>>::with_params(
device,size,
U::from_f64(0.01).expect("Error in type conversion from f64."),
U::default()
)
}
pub fn with_params(_:&DeviceCpu<U>,size:usize,lr:U,weight_decay:U) -> Adagrad<U,DeviceCpu<U>> {
Adagrad {
d:PhantomData::<DeviceCpu<U>>,
size:size,
lr:lr,
gt:vec![U::default();size].into_boxed_slice(),
weight_decay: weight_decay,
eps:U::from_f64(1e-10f64).expect("Error in type conversion from f64.")
}
}
}
impl<U> Optimizer<U,DeviceCpu<U>> for Adagrad<U,DeviceCpu<U>> where U: UnitValue<U> {
type InternalType = [U];
type InternalUpdateType<'a> = ShieldSlice<'a,U>;
#[inline]
fn update<'a>(&mut self, e: &[U], w: Self::InternalUpdateType<'a>) -> Result<(),TrainingError> {
let mut w = w;
let a = self.lr;
let weight_decay = self.weight_decay;
for ((w,&e),gt) in w.iter_mut().zip(e.iter()).zip(self.gt.iter_mut()) {
let e = e + weight_decay * *w;
*gt += e * e;
*w = *w - a * (e / (gt.sqrt() + self.eps));
}
Ok(())
}
}
impl<U> Adagrad<U,DeviceGpu<U>> where U: UnitValue<U>, DeviceGpu<U>: Device<U> {
pub fn new(device:&DeviceGpu<U>,size:usize) -> Result<Adagrad<U,DeviceGpu<U>>,OptimizerBuildError> {
Adagrad::<U,DeviceGpu<U>>::with_params(
device,size,
U::from_f64(0.01).expect("Error in type conversion from f64."),
U::default()
)
}
pub fn with_params(device:&DeviceGpu<U>,size:usize,lr:U,weight_decay:U) -> Result<Adagrad<U,DeviceGpu<U>>,OptimizerBuildError> {
Ok(Adagrad {
d:PhantomData::<DeviceGpu<U>>,
size:size,
lr:lr,
gt:CudaMemoryPoolPtr::with_initializer(size,device.get_memory_pool(),Default::default)?,
weight_decay:weight_decay,
eps:U::from_f64(1e-10f64).expect("Error in type conversion from f64.")
})
}
}
impl<U> Optimizer<U,DeviceGpu<U>> for Adagrad<U,DeviceGpu<U>>
where U: UnitValue<U>,
DeviceGpu<U>: Device<U>,
for<'a> kernel::optimizer::Adagrad<'a,U>: Kernel<Args=AdagradArgs<'a,U>> {
type InternalType = CudaMemoryPoolPtr<U>;
type InternalUpdateType<'a> = CudaMutPtr<'a,CudaMemoryPoolPtr<U>>;
#[inline]
fn update<'a>(&mut self, e: &'a CudaMemoryPoolPtr<U>, w: CudaMutPtr<'a,CudaMemoryPoolPtr<U>>) -> Result<(),TrainingError> {
let mut w = w;
let mut args = AdagradArgs::new(&mut w,e,self.size,self.lr,self.weight_decay,self.eps,&mut self.gt);
let mut kernel = kernel::optimizer::Adagrad::<'_,U>::new();
kernel.launch(dim3 { x: (self.size as c_uint + 1023) / 1024, y: 1, z: 1 },
dim3 { x: 1024, y: 1, z: 1 },
&mut args, 0)?;
Ok(())
}
}
impl<U> OptimizerState<U,DeviceCpu<U>> for Adagrad<U,DeviceCpu<U>>
where U: UnitValue<U>,
DeviceCpu<U>: Device<U> {
type Type = Box<[U]>;
}
impl<U> OptimizerState<U,DeviceGpu<U>> for Adagrad<U,DeviceGpu<U>>
where U: UnitValue<U>,
DeviceGpu<U>: Device<U> {
type Type = CudaMemoryPoolPtr<U>;
}
pub struct AdagradBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
lr:U,
weight_decay:U,
device:D
}
impl<U,D> AdagradBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
pub fn new(device:&D) -> AdagradBuilder<U,D> {
AdagradBuilder {
lr:U::from_f64(0.01).expect("Error in type conversion from f64."),
weight_decay:U::default(),
device:device.clone()
}
}
pub fn lr(self,lr:U) -> AdagradBuilder<U,D> {
AdagradBuilder {
lr:lr,
weight_decay:self.weight_decay,
device:self.device
}
}
pub fn weight_decay(self,weight_decay:U) -> AdagradBuilder<U,D> {
AdagradBuilder {
lr:self.lr,
weight_decay:weight_decay,
device:self.device
}
}
}
impl<U> OptimizerBuilder<U,DeviceCpu<U>> for AdagradBuilder<U,DeviceCpu<U>>
where U: UnitValue<U>, Adagrad<U,DeviceCpu<U>>: Optimizer<U,DeviceCpu<U>> {
type Output = Adagrad<U,DeviceCpu<U>>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError> {
Ok(Adagrad::<_,DeviceCpu<U>>::with_params(&self.device,size,self.lr,self.weight_decay))
}
}
impl<U> OptimizerBuilder<U,DeviceGpu<U>> for AdagradBuilder<U,DeviceGpu<U>>
where U: UnitValue<U>, DeviceGpu<U>: Device<U>,
Adagrad<U,DeviceGpu<U>>: Optimizer<U,DeviceGpu<U>> {
type Output = Adagrad<U,DeviceGpu<U>>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError> {
Adagrad::<_,DeviceGpu<U>>::with_params(&self.device,size,self.lr,self.weight_decay)
}
}
pub struct RMSprop<U,D>
where U: UnitValue<U>, D: Device<U>,
Self: OptimizerState<U,D> {
d:PhantomData<D>,
size:usize,
lr:U,
weight_decay:U,
alpha:U,
mu:U,
gt:<Self as OptimizerState<U,D>>::Type,
bt:<Self as OptimizerState<U,D>>::Type,
eps:U
}
impl<U> RMSprop<U,DeviceCpu<U>> where U: UnitValue<U> {
pub fn new(device:&DeviceCpu<U>,size:usize) -> RMSprop<U,DeviceCpu<U>> {
RMSprop::<U,DeviceCpu<U>>::with_lr(device,size,U::from_f64(0.0001f64).expect("Error in type conversion from f64."))
}
pub fn with_lr(device:&DeviceCpu<U>,size:usize,lr:U) -> RMSprop<U,DeviceCpu<U>> {
RMSprop::<U,DeviceCpu<U>>::with_params(
device,size,lr,
U::default(),
U::from_f64(0.9f64).expect("Error in type conversion from f64."),
U::default(),
)
}
pub fn with_params(_:&DeviceCpu<U>,size:usize,lr:U,weight_decay:U,alpha:U,mu:U) -> RMSprop<U,DeviceCpu<U>> {
RMSprop {
d:PhantomData::<DeviceCpu<U>>,
size:size,
lr:lr,
weight_decay:weight_decay,
alpha:alpha,
mu:mu,
gt:vec![U::default();size].into_boxed_slice(),
bt:vec![U::default();size].into_boxed_slice(),
eps:U::from_f64(1e-8f64).expect("Error in type conversion from f64.")
}
}
}
impl<U> Optimizer<U,DeviceCpu<U>> for RMSprop<U,DeviceCpu<U>> where U: UnitValue<U> {
type InternalType = [U];
type InternalUpdateType<'a> = ShieldSlice<'a,U>;
#[inline]
fn update<'a>(&mut self, e: &'a [U], w: Self::InternalUpdateType<'a>) -> Result<(),TrainingError> {
let mut w = w;
let a = self.lr;
let weight_decay = self.weight_decay;
let alpha = self.alpha;
let mu = self.mu;
for ((w,&e),(gt,bt)) in w.iter_mut().zip(e.iter())
.zip(self.gt.iter_mut().zip(self.bt.iter_mut())) {
let e = e + weight_decay * *w;
*gt = alpha * *gt + (U::one() - alpha) * e * e;
*bt = mu * *bt + e / (gt.sqrt() + self.eps);
*w = *w - a * *bt;
}
Ok(())
}
}
impl<U> RMSprop<U,DeviceGpu<U>> where U: UnitValue<U>, DeviceGpu<U>: Device<U> {
pub fn new(device:&DeviceGpu<U>,size:usize)
-> Result<RMSprop<U,DeviceGpu<U>>,OptimizerBuildError> {
RMSprop::<U,DeviceGpu<U>>::with_lr(device,size,U::from_f64(0.0001f64).expect("Error in type conversion from f64."))
}
pub fn with_lr(device:&DeviceGpu<U>,size:usize,lr:U)
-> Result<RMSprop<U,DeviceGpu<U>>,OptimizerBuildError> {
RMSprop::<U,DeviceGpu<U>>::with_params(
device,size,
lr,
U::default(),
U::from_f64(0.9f64).expect("Error in type conversion from f64."),
U::default()
)
}
pub fn with_params(device:&DeviceGpu<U>,size:usize,lr:U,weight_decay:U,alpha:U,mu:U)
-> Result<RMSprop<U,DeviceGpu<U>>,OptimizerBuildError> {
Ok(RMSprop {
d:PhantomData::<DeviceGpu<U>>,
size:size,
lr:lr,
weight_decay:weight_decay,
alpha:alpha,
mu:mu,
gt:CudaMemoryPoolPtr::with_initializer(size,device.get_memory_pool(),Default::default)?,
bt:CudaMemoryPoolPtr::with_initializer(size,device.get_memory_pool(),Default::default)?,
eps:U::from_f64(1e-8f64).expect("Error in type conversion from f64.")
})
}
}
impl<U> Optimizer<U,DeviceGpu<U>> for RMSprop<U,DeviceGpu<U>>
where U: UnitValue<U>,
DeviceGpu<U>: Device<U>,
for<'a> kernel::optimizer::RMSprop<'a,U>: Kernel<Args=RMSpropArgs<'a,U>> {
type InternalType = CudaMemoryPoolPtr<U>;
type InternalUpdateType<'a> = CudaMutPtr<'a,CudaMemoryPoolPtr<U>>;
#[inline]
fn update<'a>(&mut self, e: &'a CudaMemoryPoolPtr<U>, w: CudaMutPtr<'a,CudaMemoryPoolPtr<U>>) -> Result<(),TrainingError> {
let mut w = w;
let mut args = RMSpropArgs::new(&mut w,e,self.size,self.lr,self.weight_decay,self.alpha,self.mu,self.eps,&mut self.gt, &mut self.bt);
let mut kernel = kernel::optimizer::RMSprop::<'_,U>::new();
kernel.launch(dim3 { x: (self.size as c_uint + 1023) / 1024, y: 1, z: 1 },
dim3 { x: 1024, y: 1, z: 1 },
&mut args, 0)?;
Ok(())
}
}
impl<U> OptimizerState<U,DeviceCpu<U>> for RMSprop<U,DeviceCpu<U>>
where U: UnitValue<U>,
DeviceCpu<U>: Device<U> {
type Type = Box<[U]>;
}
impl<U> OptimizerState<U,DeviceGpu<U>> for RMSprop<U,DeviceGpu<U>>
where U: UnitValue<U>,
DeviceGpu<U>: Device<U> {
type Type = CudaMemoryPoolPtr<U>;
}
pub struct RMSpropBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
lr:U,
weight_decay:U,
alpha:U,
mu:U,
device:D
}
impl<U,D> RMSpropBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
pub fn new(device:&D) -> RMSpropBuilder<U,D> {
RMSpropBuilder {
lr:U::from_f64(0.0001f64).expect("Error in type conversion from f64."),
weight_decay:U::default(),
alpha:U::from_f64(0.9f64).expect("Error in type conversion from f64."),
mu:U::default(),
device:device.clone()
}
}
pub fn lr(self,lr:U) -> RMSpropBuilder<U,D> {
RMSpropBuilder {
device:self.device,
lr:lr,
weight_decay:self.weight_decay,
alpha:self.alpha,
mu:self.mu
}
}
pub fn weight_decay(self,weight_decay:U) -> RMSpropBuilder<U,D> {
RMSpropBuilder {
device:self.device,
lr:self.lr,
weight_decay:weight_decay,
alpha:self.alpha,
mu:self.mu,
}
}
pub fn alpha(self,alpha:U) -> RMSpropBuilder<U,D> {
RMSpropBuilder {
device:self.device,
lr:self.lr,
weight_decay:self.weight_decay,
alpha:alpha,
mu:self.mu,
}
}
pub fn mu(self,mu:U) -> RMSpropBuilder<U,D> {
RMSpropBuilder {
device:self.device,
lr:self.lr,
weight_decay:self.weight_decay,
alpha:self.alpha,
mu:mu,
}
}
}
impl<U> OptimizerBuilder<U,DeviceCpu<U>> for RMSpropBuilder<U,DeviceCpu<U>>
where U: UnitValue<U>, RMSprop<U,DeviceCpu<U>>: Optimizer<U,DeviceCpu<U>> {
type Output = RMSprop<U,DeviceCpu<U>>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError> {
Ok(RMSprop::<_,DeviceCpu<U>>::with_params(&self.device,size,self.lr,self.weight_decay,self.alpha,self.mu))
}
}
impl<U> OptimizerBuilder<U,DeviceGpu<U>> for RMSpropBuilder<U,DeviceGpu<U>>
where U: UnitValue<U>, DeviceGpu<U>: Device<U>,
RMSprop<U,DeviceGpu<U>>: Optimizer<U,DeviceGpu<U>> {
type Output = RMSprop<U,DeviceGpu<U>>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError> {
RMSprop::<_,DeviceGpu<U>>::with_params(&self.device,size,self.lr,self.weight_decay,self.alpha,self.mu)
}
}
pub struct Adam<U,D>
where U: UnitValue<U>, D: Device<U>,
Self: OptimizerState<U,D> {
d:PhantomData<D>,
size:usize,
lr:U,
weight_decay:U,
mt:<Self as OptimizerState<U,D>>::Type,
vt:<Self as OptimizerState<U,D>>::Type,
b1:U,
b2:U,
b1t:U,
b2t:U,
eps:U
}
impl<U> Adam<U,DeviceCpu<U>> where U: UnitValue<U> {
pub fn new(device:&DeviceCpu<U>,size:usize) -> Adam<U,DeviceCpu<U>> {
Adam::<U,DeviceCpu<U>>::with_lr(device,size,U::from_f64(0.001f64).expect("Error in type conversion from f64."))
}
pub fn with_lr(device:&DeviceCpu<U>,size:usize,lr:U) -> Adam<U,DeviceCpu<U>> {
Adam::<U,DeviceCpu<U>>::with_params(device,size,
lr,
U::default(),
U::from_f64(0.9f64).expect("Error in type conversion from f64."),
U::from_f64(0.999f64).expect("Error in type conversion from f64."))
}
pub fn with_params(_:&DeviceCpu<U>,size:usize,lr:U,weight_decay:U,b1:U,b2:U) -> Adam<U,DeviceCpu<U>> {
Adam {
d:PhantomData::<DeviceCpu<U>>,
size:size,
lr:lr,
weight_decay:weight_decay,
mt:vec![U::default();size].into_boxed_slice(),
vt:vec![U::default();size].into_boxed_slice(),
b1:b1,
b2:b2,
b1t:b1,
b2t:b2,
eps:U::from_f64(1e-8f64).expect("Error in type conversion from f64.")
}
}
}
impl<U> Optimizer<U,DeviceCpu<U>> for Adam<U,DeviceCpu<U>> where U: UnitValue<U> {
type InternalType = [U];
type InternalUpdateType<'a> = ShieldSlice<'a,U>;
#[inline]
fn update<'a>(&mut self, e: &'a [U], w: Self::InternalUpdateType<'a>) -> Result<(),TrainingError> {
let mut w = w;
let a = self.lr;
let weight_decay = self.weight_decay;
let b1 = self.b1;
let b2 = self.b2;
let b1t = self.b1t;
let b2t = self.b2t;
for ((w,&e),(mt,vt)) in w.iter_mut().zip(e.iter()).zip(self.mt.iter_mut().zip(self.vt.iter_mut())) {
let e = e + weight_decay * *w;
*mt = b1 * *mt + (U::one() - self.b1) * e;
*vt = b2 * *vt + (U::one() - self.b2) * e * e;
*w = *w - a * (*mt / (U::one() - b1t)) / ((*vt / (U::one() - b2t)) + self.eps).sqrt();
}
self.b1t = b1t * b1;
self.b2t = b2t * b2;
Ok(())
}
}
impl<U> Adam<U,DeviceGpu<U>> where U: UnitValue<U>, DeviceGpu<U>: Device<U> {
pub fn new(device:&DeviceGpu<U>,size:usize) -> Result<Adam<U,DeviceGpu<U>>,OptimizerBuildError> {
Adam::<U,DeviceGpu<U>>::with_lr(device,size,U::from_f64(0.001f64).expect("Error in type conversion from f64."))
}
pub fn with_lr(device:&DeviceGpu<U>,size:usize,lr:U) ->Result<Adam<U,DeviceGpu<U>>,OptimizerBuildError> {
Adam::<U,DeviceGpu<U>>::with_params(device,size,lr,
U::default(),
U::from_f64(0.9f64).expect("Error in type conversion from f64."),
U::from_f64(0.999f64).expect("Error in type conversion from f64.")
)
}
pub fn with_params(device:&DeviceGpu<U>,size:usize,lr:U,weight_decay:U,b1:U,b2:U) ->Result<Adam<U,DeviceGpu<U>>,OptimizerBuildError> {
Ok(Adam {
d:PhantomData::<DeviceGpu<U>>,
size:size,
lr:lr,
weight_decay:weight_decay,
mt:CudaMemoryPoolPtr::with_initializer(size,device.get_memory_pool(),Default::default)?,
vt:CudaMemoryPoolPtr::with_initializer(size,device.get_memory_pool(),Default::default)?,
b1:b1,
b2:b2,
b1t:b1,
b2t:b2,
eps:U::from_f64(1e-8f64).expect("Error in type conversion from f64.")
})
}
}
impl<U> Optimizer<U,DeviceGpu<U>> for Adam<U,DeviceGpu<U>>
where U: UnitValue<U>,
DeviceGpu<U>: Device<U>,
for<'a> kernel::optimizer::Adam<'a,U>: Kernel<Args=AdamArgs<'a,U>> {
type InternalType = CudaMemoryPoolPtr<U>;
type InternalUpdateType<'a> = CudaMutPtr<'a,CudaMemoryPoolPtr<U>>;
#[inline]
fn update<'a>(&mut self, e: &'a CudaMemoryPoolPtr<U>, w: CudaMutPtr<'a,CudaMemoryPoolPtr<U>>) -> Result<(),TrainingError> {
let mut w = w;
let mut args = AdamArgs::new(&mut w,e,self.size,self.lr,self.weight_decay,self.eps,
&mut self.mt,&mut self.vt,
self.b1,self.b2,self.b1t,self.b2t);
let mut kernel = kernel::optimizer::Adam::<'_,U>::new();
kernel.launch(dim3 { x: (self.size as c_uint + 1023) / 1024, y: 1, z: 1 },
dim3 { x: 1024, y: 1, z: 1 },
&mut args, 0)?;
self.b1t = self.b1t * self.b1;
self.b2t = self.b2t * self.b2;
Ok(())
}
}
impl<U> OptimizerState<U,DeviceCpu<U>> for Adam<U,DeviceCpu<U>>
where U: UnitValue<U>,
DeviceCpu<U>: Device<U> {
type Type = Box<[U]>;
}
impl<U> OptimizerState<U,DeviceGpu<U>> for Adam<U,DeviceGpu<U>>
where U: UnitValue<U>,
DeviceGpu<U>: Device<U> {
type Type = CudaMemoryPoolPtr<U>;
}
pub struct AdamBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
lr:U,
weight_decay:U,
b1:U,
b2:U,
device:D
}
impl<U,D> AdamBuilder<U,D> where U: UnitValue<U>, D: Device<U> {
pub fn new(device:&D) -> AdamBuilder<U,D> {
AdamBuilder {
lr:U::from_f64(0.001f64).expect("Error in type conversion from f64."),
weight_decay:U::default(),
b1:U::from_f64(0.9f64).expect("Error in type conversion from f64."),
b2:U::from_f64(0.999f64).expect("Error in type conversion from f64."),
device:device.clone()
}
}
pub fn lr(self,lr:U) -> AdamBuilder<U,D> {
AdamBuilder {
device:self.device,
lr:lr,
weight_decay:self.weight_decay,
b1:self.b1,
b2:self.b2
}
}
pub fn weight_decay(self,weight_decay:U) -> AdamBuilder<U,D> {
AdamBuilder {
device:self.device,
lr:self.lr,
weight_decay:weight_decay,
b1:self.b1,
b2:self.b2
}
}
pub fn b1(self,b1:U) -> AdamBuilder<U,D> {
AdamBuilder {
device:self.device,
lr:self.lr,
weight_decay:self.weight_decay,
b1:b1,
b2:self.b2
}
}
pub fn b2(self,b2:U) -> AdamBuilder<U,D> {
AdamBuilder {
device:self.device,
lr:self.lr,
weight_decay:self.weight_decay,
b1:self.b1,
b2:b2
}
}
}
impl<U> OptimizerBuilder<U,DeviceCpu<U>> for AdamBuilder<U,DeviceCpu<U>>
where U: UnitValue<U>, Adam<U,DeviceCpu<U>>: Optimizer<U,DeviceCpu<U>> {
type Output = Adam<U,DeviceCpu<U>>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError> {
Ok(Adam::<_,DeviceCpu<U>>::with_params(&self.device,size,self.lr,self.weight_decay,self.b1,self.b2))
}
}
impl<U> OptimizerBuilder<U,DeviceGpu<U>> for AdamBuilder<U,DeviceGpu<U>>
where U: UnitValue<U>, DeviceGpu<U>: Device<U>,
Adam<U,DeviceGpu<U>>: Optimizer<U,DeviceGpu<U>> {
type Output = Adam<U,DeviceGpu<U>>;
fn build(&self,size:usize) -> Result<Self::Output,OptimizerBuildError> {
Adam::<_,DeviceGpu<U>>::with_params(&self.device,size,self.lr,self.weight_decay,self.b1,self.b2)
}
}