#![allow(clippy::assign_op_pattern)]
#![allow(clippy::expect_used)]
use std::{cmp::Ordering, fs, fs::File, io::prelude::*};
use rand::prelude::*;
use rand_distr::Normal;
use rayon::prelude::*;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
pub type Float = f32;
#[cfg(feature = "floats-f64")]
pub type Float = f64;
pub trait Evaluator {
fn eval_test(&self, parameters: &[Float]) -> Float;
fn eval_train(&self, parameters: &[Float], loop_index: usize) -> Float;
}
pub trait Optimizer {
fn get_delta(&mut self, parameters: &[Float], gradient: &[Float]) -> Vec<Float>;
fn get_t(&self) -> usize;
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SGD {
lr: Float,
lambda: Float,
beta: Float,
lastv: Vec<Float>,
t: usize,
}
impl Default for SGD {
fn default() -> Self {
Self { lr: 0.01, lambda: 0.0, beta: 0.0, lastv: vec![0.0], t: 0 }
}
}
impl SGD {
pub fn set_lr(&mut self, learning_rate: Float) -> &mut Self {
if learning_rate <= 0.0 {
panic!("Learning rate must be greater than zero!");
}
self.lr = learning_rate;
self
}
pub fn set_lambda(&mut self, coeff: Float) -> &mut Self {
if coeff < 0.0 {
panic!("Lambda coefficient may not be smaller than zero!");
}
self.lambda = coeff;
self
}
pub fn set_beta(&mut self, factor: Float) -> &mut Self {
if !(0.0..1.0).contains(&factor) {
panic!("Prohibited momentum paramter: {}. Must be in [0.0, 1.0)!", factor);
}
self.beta = factor;
self
}
#[must_use]
pub fn to_json(&self) -> String {
serde_json::to_string(self).expect("Encoding JSON failed!")
}
#[must_use]
pub fn from_json(encoded: &str) -> SGD {
serde_json::from_str(encoded).expect("Decoding JSON failed!")
}
pub fn save(&self, file: &str) -> Result<(), std::io::Error> {
let mut file = File::create(file)?;
let json = self.to_json();
file.write_all(json.as_bytes())?;
Ok(())
}
pub fn load(file: &str) -> Result<SGD, std::io::Error> {
let json = fs::read_to_string(file)?;
Ok(SGD::from_json(&json))
}
}
impl Optimizer for SGD {
fn get_delta(&mut self, params: &[Float], grad: &[Float]) -> Vec<Float> {
if self.lastv.len() != params.len() {
self.lastv = vec![0.0; params.len()];
}
let mut delta = grad.to_vec();
for ((m, d), p) in self.lastv.iter_mut().zip(delta.iter_mut()).zip(params.iter()) {
*m = self.beta.mul_add(*m, (1.0 - self.beta) * *d);
*d = self.lr * *m; *d -= self.lr * self.lambda * *p;
}
self.t += 1;
delta
}
fn get_t(&self) -> usize {
self.t
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Adam {
lr: Float,
lambda: Float,
beta1: Float,
beta2: Float,
eps: Float,
t: usize,
avggrad1: Vec<Float>,
avggrad2: Vec<Float>,
adabound: bool,
final_lr: Float,
gamma: Float,
}
impl Default for Adam {
fn default() -> Self {
Self {
lr: 0.001,
lambda: 0.0,
beta1: 0.9,
beta2: 0.999,
eps: 1e-8,
t: 0,
avggrad1: vec![0.0],
avggrad2: vec![0.0],
adabound: false,
final_lr: 0.1,
gamma: 0.001,
}
}
}
impl Adam {
pub fn set_lr(&mut self, learning_rate: Float) -> &mut Self {
if learning_rate <= 0.0 {
panic!("Learning rate must be greater than zero!");
}
self.lr = learning_rate;
self
}
pub fn set_final_lr(&mut self, learning_rate: Float) -> &mut Self {
if learning_rate <= 0.0 {
panic!("Learning rate must be greater than zero!");
}
self.final_lr = learning_rate;
self
}
pub fn set_lambda(&mut self, coeff: Float) -> &mut Self {
if coeff < 0.0 {
panic!("Lambda coefficient may not be smaller than zero!");
}
self.lambda = coeff;
self
}
pub fn set_gamma(&mut self, coeff: Float) -> &mut Self {
if !(0.0..1.0).contains(&coeff) {
panic!("Gamma coefficient is in appropriate!");
}
self.gamma = coeff;
self
}
pub fn set_beta1(&mut self, beta: Float) -> &mut Self {
if !(0.0..1.0).contains(&beta) {
panic!("Prohibited beta coefficient: {}. Must be in [0.0, 1.0)!", beta);
}
self.beta1 = beta;
self
}
pub fn set_beta2(&mut self, beta: Float) -> &mut Self {
if !(0.0..1.0).contains(&beta) {
panic!("Prohibited beta coefficient: {}. Must be in [0.0, 1.0)!", beta);
}
self.beta2 = beta;
self
}
pub fn set_eps(&mut self, epsilon: Float) -> &mut Self {
if epsilon < 0.0 {
panic!("Epsilon must be >= 0!");
}
self.eps = epsilon;
self
}
pub fn set_adabound(&mut self, use_bound: bool) -> &mut Self {
self.adabound = use_bound;
self
}
#[must_use]
pub fn to_json(&self) -> String {
serde_json::to_string(self).expect("Encoding JSON failed!")
}
#[must_use]
pub fn from_json(encoded: &str) -> Adam {
serde_json::from_str(encoded).expect("Decoding JSON failed!")
}
pub fn save(&self, file: &str) -> Result<(), std::io::Error> {
let mut file = File::create(file)?;
let json = self.to_json();
file.write_all(json.as_bytes())?;
Ok(())
}
pub fn load(file: &str) -> Result<Adam, std::io::Error> {
let json = fs::read_to_string(file)?;
Ok(Adam::from_json(&json))
}
}
impl Optimizer for Adam {
fn get_delta(&mut self, params: &[Float], grad: &[Float]) -> Vec<Float> {
if self.avggrad1.len() != params.len() || self.avggrad2.len() != params.len() {
self.avggrad1 = vec![0.0; params.len()];
self.avggrad2 = vec![0.0; params.len()];
}
self.t += 1;
let lr_unbias = self.lr * (1.0 - self.beta2.powf(self.t as Float)).sqrt()
/ (1.0 - self.beta1.powf(self.t as Float));
let lower_bound = (1.0 - 1.0 / (self.gamma.mul_add(self.t as Float, 1.0))) * self.final_lr;
let upper_bound = (1.0 + 1.0 / (self.gamma * self.t as Float)) * self.final_lr;
let mut delta = grad.to_vec();
for (((g1, g2), d), p) in self
.avggrad1
.iter_mut()
.zip(self.avggrad2.iter_mut())
.zip(delta.iter_mut())
.zip(params.iter())
{
*g1 = self.beta1.mul_add(*g1, (1.0 - self.beta1) * *d);
*g2 = self.beta2.mul_add(*g2, (1.0 - self.beta2) * *d * *d);
if self.adabound {
let bound_lr =
(lr_unbias / (g2.sqrt() + self.eps)).max(lower_bound).min(upper_bound);
*d = bound_lr * *g1;
} else {
*d = lr_unbias * *g1 / (g2.sqrt() + self.eps); }
*d -= self.lr * self.lambda * *p;
}
delta
}
fn get_t(&self) -> usize {
self.t
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RAdam {
lr: Float,
lambda: Float,
beta1: Float,
beta2: Float,
eps: Float,
t: usize,
avggrad1: Vec<Float>,
avggrad2: Vec<Float>,
}
impl Default for RAdam {
fn default() -> Self {
RAdam {
lr: 0.001,
lambda: 0.0,
beta1: 0.9,
beta2: 0.999,
eps: 1e-8,
t: 0,
avggrad1: vec![0.0],
avggrad2: vec![0.0],
}
}
}
impl RAdam {
pub fn set_lr(&mut self, learning_rate: Float) -> &mut Self {
if learning_rate <= 0.0 {
panic!("Learning rate must be greater than zero!");
}
self.lr = learning_rate;
self
}
pub fn set_lambda(&mut self, coeff: Float) -> &mut Self {
if coeff < 0.0 {
panic!("Lambda coefficient may not be smaller than zero!");
}
self.lambda = coeff;
self
}
pub fn set_beta1(&mut self, beta: Float) -> &mut Self {
if !(0.0..1.0).contains(&beta) {
panic!("Prohibited beta coefficient: {}. Must be in [0.0, 1.0)!", beta);
}
self.beta1 = beta;
self
}
pub fn set_beta2(&mut self, beta: Float) -> &mut Self {
if !(0.0..1.0).contains(&beta) {
panic!("Prohibited beta coefficient: {}. Must be in [0.0, 1.0)!", beta);
}
self.beta2 = beta;
self
}
pub fn set_eps(&mut self, epsilon: Float) -> &mut Self {
if epsilon < 0.0 {
panic!("Epsilon must be >= 0!");
}
self.eps = epsilon;
self
}
#[must_use]
pub fn to_json(&self) -> String {
serde_json::to_string(self).expect("Encoding JSON failed!")
}
#[must_use]
pub fn from_json(encoded: &str) -> RAdam {
serde_json::from_str(encoded).expect("Decoding JSON failed!")
}
pub fn save(&self, file: &str) -> Result<(), std::io::Error> {
let mut file = File::create(file)?;
let json = self.to_json();
file.write_all(json.as_bytes())?;
Ok(())
}
pub fn load(file: &str) -> Result<RAdam, std::io::Error> {
let json = fs::read_to_string(file)?;
Ok(RAdam::from_json(&json))
}
}
impl Optimizer for RAdam {
fn get_delta(&mut self, params: &[Float], grad: &[Float]) -> Vec<Float> {
if self.avggrad1.len() != params.len() || self.avggrad2.len() != params.len() {
self.avggrad1 = vec![0.0; params.len()];
self.avggrad2 = vec![0.0; params.len()];
}
self.t += 1;
let t_float = self.t as Float;
let beta1_pt = self.beta1.powf(t_float);
let beta2_pt = self.beta2.powf(t_float);
let sma_inf = 2.0 / (1.0 - self.beta2) - 1.0;
let sma_t = sma_inf - 2.0 * t_float * beta2_pt / (1.0 - beta2_pt);
let r_t = (((sma_t - 4.0) * (sma_t - 2.0) * sma_inf)
/ ((sma_inf - 4.0) * (sma_inf - 2.0) * sma_t))
.sqrt(); let lr_unbias1 = self.lr / (1.0 - beta1_pt);
let lr_unbias12 = self.lr * (1.0 - beta2_pt).sqrt() / (1.0 - beta1_pt);
let mut delta = grad.to_vec();
for (((g1, g2), d), p) in self
.avggrad1
.iter_mut()
.zip(self.avggrad2.iter_mut())
.zip(delta.iter_mut())
.zip(params.iter())
{
*g1 = self.beta1.mul_add(*g1, (1.0 - self.beta1) * *d);
*g2 = self.beta2.mul_add(*g2, (1.0 - self.beta2) * *d * *d);
if sma_t > 4.0 {
*d = lr_unbias12 * r_t * *g1 / (g2.sqrt() + self.eps); } else {
*d = lr_unbias1 * *g1; }
*d -= self.lr * self.lambda * *p;
}
delta
}
fn get_t(&self) -> usize {
self.t
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Adamax {
lr: Float,
lambda: Float,
beta1: Float,
beta2: Float,
eps: Float,
t: usize,
avggrad1: Vec<Float>,
avggrad2: Vec<Float>,
}
impl Default for Adamax {
fn default() -> Self {
Self {
lr: 0.002,
lambda: 0.0,
beta1: 0.9,
beta2: 0.999,
eps: 0.0,
t: 0,
avggrad1: vec![0.0],
avggrad2: vec![0.0],
}
}
}
impl Adamax {
pub fn set_lr(&mut self, learning_rate: Float) -> &mut Self {
if learning_rate <= 0.0 {
panic!("Learning rate must be greater than zero!");
}
self.lr = learning_rate;
self
}
pub fn set_lambda(&mut self, coeff: Float) -> &mut Self {
if coeff < 0.0 {
panic!("Lambda coefficient may not be smaller than zero!");
}
self.lambda = coeff;
self
}
pub fn set_beta1(&mut self, beta: Float) -> &mut Self {
if !(0.0..1.0).contains(&beta) {
panic!("Prohibited beta coefficient: {}. Must be in [0.0, 1.0)!", beta);
}
self.beta1 = beta;
self
}
pub fn set_beta2(&mut self, beta: Float) -> &mut Self {
if !(0.0..1.0).contains(&beta) {
panic!("Prohibited beta coefficient: {}. Must be in [0.0, 1.0)!", beta);
}
self.beta2 = beta;
self
}
pub fn set_eps(&mut self, epsilon: Float) -> &mut Self {
if epsilon < 0.0 {
panic!("Epsilon must be >= 0!");
}
self.eps = epsilon;
self
}
#[must_use]
pub fn to_json(&self) -> String {
serde_json::to_string(self).expect("Encoding JSON failed!")
}
#[must_use]
pub fn from_json(encoded: &str) -> Adamax {
serde_json::from_str(encoded).expect("Decoding JSON failed!")
}
pub fn save(&self, file: &str) -> Result<(), std::io::Error> {
let mut file = File::create(file)?;
let json = self.to_json();
file.write_all(json.as_bytes())?;
Ok(())
}
pub fn load(file: &str) -> Result<Adamax, std::io::Error> {
let json = fs::read_to_string(file)?;
Ok(Adamax::from_json(&json))
}
}
impl Optimizer for Adamax {
fn get_delta(&mut self, params: &[Float], grad: &[Float]) -> Vec<Float> {
if self.avggrad1.len() != params.len() || self.avggrad2.len() != params.len() {
self.avggrad1 = vec![0.0; params.len()];
self.avggrad2 = vec![0.0; params.len()];
}
self.t += 1;
let lr_unbias = self.lr / (1.0 - self.beta1.powf(self.t as Float));
let mut delta = grad.to_vec();
for (((g1, g2), d), p) in self
.avggrad1
.iter_mut()
.zip(self.avggrad2.iter_mut())
.zip(delta.iter_mut())
.zip(params.iter())
{
*g1 = self.beta1.mul_add(*g1, (1.0 - self.beta1) * *d);
*g2 = (self.beta2 * *g2).max(d.abs());
*d = lr_unbias * *g1 / (*g2 + self.eps); *d -= self.lr * self.lambda * *p;
}
delta
}
fn get_t(&self) -> usize {
self.t
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Lookahead<Opt: Optimizer> {
subopt: Opt,
alpha: Float,
t: usize,
k: usize,
paramssave: Vec<Float>,
}
impl<Opt: Optimizer> Lookahead<Opt> {
#[must_use]
pub fn new(opt: Opt) -> Lookahead<Opt> {
Lookahead { subopt: opt, alpha: 0.5, t: 0, k: 5, paramssave: Vec::new() }
}
pub fn set_alpha(&mut self, step: Float) -> &mut Self {
if step <= 0.0 {
panic!("Step size must be greater than zero!");
}
self.alpha = step;
self
}
pub fn set_k(&mut self, syncfreq: usize) -> &mut Self {
if syncfreq < 1 {
panic!("Synchronization frequency in Lookahead must be at least k=1");
}
self.k = syncfreq;
self
}
pub fn get_opt(&self) -> &Opt {
&self.subopt
}
pub fn get_opt_mut(&mut self) -> &mut Opt {
&mut self.subopt
}
}
impl<Opt: Optimizer + Serialize + DeserializeOwned> Lookahead<Opt> {
#[must_use]
pub fn to_json(&self) -> String {
serde_json::to_string(self).expect("Encoding JSON failed!")
}
#[must_use]
pub fn from_json(encoded: &str) -> Lookahead<Opt> {
serde_json::from_str(encoded).expect("Decoding JSON failed!")
}
pub fn save(&self, file: &str) -> Result<(), std::io::Error> {
let mut file = File::create(file)?;
let json = self.to_json();
file.write_all(json.as_bytes())?;
Ok(())
}
pub fn load(file: &str) -> Result<Lookahead<Opt>, std::io::Error> {
let json = fs::read_to_string(file)?;
Ok(Lookahead::<Opt>::from_json(&json))
}
}
impl<Opt: Optimizer> Optimizer for Lookahead<Opt> {
fn get_delta(&mut self, params: &[Float], grad: &[Float]) -> Vec<Float> {
if self.t == 0 {
self.paramssave = params.to_vec();
}
let mut delta = self.subopt.get_delta(params, grad);
self.t += 1;
if self.t % self.k == 0 {
for ((ps, p), d) in self.paramssave.iter_mut().zip(params.iter()).zip(delta.iter_mut())
{
let diff = (*p + *d) - *ps; let new = self.alpha.mul_add(diff, *ps); *d = new - *p; *ps = new; }
}
delta
}
fn get_t(&self) -> usize {
self.t
}
}
#[derive(Debug)]
pub struct ES<Feval: Evaluator, Opt: Optimizer> {
dim: usize,
params: Vec<Float>,
opt: Opt,
eval: Feval,
std: Float,
samples: usize,
}
impl<Feval: Evaluator> ES<Feval, SGD> {
pub fn new_with_sgd(
evaluator: Feval,
learning_rate: Float,
beta: Float,
lambda: Float,
) -> ES<Feval, SGD> {
let mut optimizer = SGD::default();
optimizer.set_lr(learning_rate).set_beta(beta).set_lambda(lambda);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
}
impl<Feval: Evaluator> ES<Feval, Lookahead<SGD>> {
pub fn new_with_lookahead_sgd(
evaluator: Feval,
k: usize,
learning_rate: Float,
beta: Float,
lambda: Float,
) -> ES<Feval, Lookahead<SGD>> {
let mut optimizer = SGD::default();
optimizer.set_lr(learning_rate).set_beta(beta).set_lambda(lambda);
let mut optimizer = Lookahead::new(optimizer);
optimizer.set_k(k);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
}
impl<Feval: Evaluator> ES<Feval, Adam> {
pub fn new_with_adam(evaluator: Feval, learning_rate: Float, lambda: Float) -> ES<Feval, Adam> {
let mut optimizer = Adam::default();
optimizer.set_lr(learning_rate).set_lambda(lambda);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
pub fn new_with_adam_ex(
evaluator: Feval,
learning_rate: Float,
lambda: Float,
beta1: Float,
beta2: Float,
adabound: bool,
final_lr: Float,
) -> ES<Feval, Adam> {
let mut optimizer = Adam::default();
optimizer
.set_lr(learning_rate)
.set_lambda(lambda)
.set_beta1(beta1)
.set_beta2(beta2)
.set_adabound(adabound)
.set_final_lr(final_lr);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
}
impl<Feval: Evaluator> ES<Feval, Lookahead<Adam>> {
pub fn new_with_lookahead_adam(
evaluator: Feval,
k: usize,
learning_rate: Float,
lambda: Float,
) -> ES<Feval, Lookahead<Adam>> {
let mut optimizer = Adam::default();
optimizer.set_lr(learning_rate).set_lambda(lambda);
let mut optimizer = Lookahead::new(optimizer);
optimizer.set_k(k);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
pub fn new_with_lookahead_adam_ex(
evaluator: Feval,
alpha: Float,
k: usize,
learning_rate: Float,
lambda: Float,
beta1: Float,
beta2: Float,
) -> ES<Feval, Lookahead<Adam>> {
let mut optimizer = Adam::default();
optimizer.set_lr(learning_rate).set_lambda(lambda).set_beta1(beta1).set_beta2(beta2);
let mut optimizer = Lookahead::new(optimizer);
optimizer.set_alpha(alpha).set_k(k);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
}
impl<Feval: Evaluator> ES<Feval, RAdam> {
pub fn new_with_radam(
evaluator: Feval,
learning_rate: Float,
lambda: Float,
) -> ES<Feval, RAdam> {
let mut optimizer = RAdam::default();
optimizer.set_lr(learning_rate).set_lambda(lambda);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
pub fn new_with_radam_ex(
evaluator: Feval,
learning_rate: Float,
lambda: Float,
beta1: Float,
beta2: Float,
) -> ES<Feval, RAdam> {
let mut optimizer = RAdam::default();
optimizer.set_lr(learning_rate).set_lambda(lambda).set_beta1(beta1).set_beta2(beta2);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
}
impl<Feval: Evaluator> ES<Feval, Lookahead<RAdam>> {
pub fn new_with_lookahead_radam(
evaluator: Feval,
k: usize,
learning_rate: Float,
lambda: Float,
) -> ES<Feval, Lookahead<RAdam>> {
let mut optimizer = RAdam::default();
optimizer.set_lr(learning_rate).set_lambda(lambda);
let mut optimizer = Lookahead::new(optimizer);
optimizer.set_k(k);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
pub fn new_with_lookahead_radam_ex(
evaluator: Feval,
alpha: Float,
k: usize,
learning_rate: Float,
lambda: Float,
beta1: Float,
beta2: Float,
) -> ES<Feval, Lookahead<RAdam>> {
let mut optimizer = RAdam::default();
optimizer.set_lr(learning_rate).set_lambda(lambda).set_beta1(beta1).set_beta2(beta2);
let mut optimizer = Lookahead::new(optimizer);
optimizer.set_alpha(alpha).set_k(k);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
}
impl<Feval: Evaluator> ES<Feval, Adamax> {
pub fn new_with_adamax(
evaluator: Feval,
learning_rate: Float,
lambda: Float,
) -> ES<Feval, Adamax> {
let mut optimizer = Adamax::default();
optimizer.set_lr(learning_rate).set_lambda(lambda);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
pub fn new_with_adamax_ex(
evaluator: Feval,
learning_rate: Float,
lambda: Float,
beta1: Float,
beta2: Float,
eps: Float,
) -> ES<Feval, Adamax> {
let mut optimizer = Adamax::default();
optimizer
.set_lr(learning_rate)
.set_lambda(lambda)
.set_beta1(beta1)
.set_beta2(beta2)
.set_eps(eps);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
}
impl<Feval: Evaluator> ES<Feval, Lookahead<Adamax>> {
pub fn new_with_lookahead_adamax(
evaluator: Feval,
k: usize,
learning_rate: Float,
lambda: Float,
) -> ES<Feval, Lookahead<Adamax>> {
let mut optimizer = Adamax::default();
optimizer.set_lr(learning_rate).set_lambda(lambda);
let mut optimizer = Lookahead::new(optimizer);
optimizer.set_k(k);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
pub fn new_with_lookahead_adamax_ex(
evaluator: Feval,
alpha: Float,
k: usize,
learning_rate: Float,
lambda: Float,
beta1: Float,
beta2: Float,
) -> ES<Feval, Lookahead<Adamax>> {
let mut optimizer = Adamax::default();
optimizer.set_lr(learning_rate).set_lambda(lambda).set_beta1(beta1).set_beta2(beta2);
let mut optimizer = Lookahead::new(optimizer);
optimizer.set_alpha(alpha).set_k(k);
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
}
impl<Feval: Evaluator, Opt: Optimizer> ES<Feval, Opt> {
pub fn new(optimizer: Opt, evaluator: Feval) -> ES<Feval, Opt> {
ES { dim: 1, params: vec![0.0], opt: optimizer, eval: evaluator, std: 0.02, samples: 500 }
}
pub fn set_params(&mut self, params: Vec<Float>) -> &mut Self {
self.params = params;
self.dim = self.params.len();
self
}
pub fn set_opt(&mut self, optimizer: Opt) -> &mut Self {
self.opt = optimizer;
self
}
pub fn set_eval(&mut self, evaluator: Feval) -> &mut Self {
self.eval = evaluator;
self
}
pub fn set_std(&mut self, noise: Float) -> &mut Self {
if noise <= 0.0 {
panic!("Noise std may not be <= 0!");
}
self.std = noise;
self
}
pub fn set_samples(&mut self, num: usize) -> &mut Self {
if num == 0 {
panic!("Number of samples cannot be zero!");
}
self.samples = num;
self
}
pub fn get_params(&self) -> &Vec<Float> {
&self.params
}
pub fn get_opt(&self) -> &Opt {
&self.opt
}
pub fn get_eval(&self) -> &Feval {
&self.eval
}
pub fn get_params_mut(&mut self) -> &mut Vec<Float> {
&mut self.params
}
pub fn get_opt_mut(&mut self) -> &mut Opt {
&mut self.opt
}
pub fn get_eval_mut(&mut self) -> &mut Feval {
&mut self.eval
}
pub fn optimize(&mut self, n: usize) -> (Float, Float) {
let mut rng = thread_rng();
let mut grad = vec![0.0; self.dim];
let t = self.opt.get_t();
for iterations in 0..n {
let seed = rng.gen::<u64>() % (std::u64::MAX - self.samples as u64);
grad = vec![0.0; self.dim];
for i in 0..self.samples {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let eps = gen_rnd_vec_rng(&mut rng, self.dim, self.std);
let mut testparampos = eps.clone();
let mut testparamneg = eps.clone();
for ((pos, neg), p) in
testparampos.iter_mut().zip(testparamneg.iter_mut()).zip(self.params.iter())
{
*pos = *p + *pos;
*neg = *p - *neg;
}
let scorepos = self.eval.eval_train(&testparampos, t + iterations);
let scoreneg = self.eval.eval_train(&testparamneg, t + iterations);
for (g, e) in grad.iter_mut().zip(eps.iter()) {
*g += *e * (scorepos - scoreneg);
}
}
mul_scalar(&mut grad, 1.0 / ((2 * self.samples) as Float * self.std));
let delta = self.opt.get_delta(&self.params, &grad);
add_inplace(&mut self.params, &delta);
}
(self.eval.eval_test(&self.params), norm(&grad))
}
pub fn optimize_ranked(&mut self, n: usize) -> (Float, Float) {
let mut rng = thread_rng();
let mut grad = vec![0.0; self.dim];
let t = self.opt.get_t();
for iterations in 0..n {
let seed = rng.gen::<u64>() % (std::u64::MAX - self.samples as u64);
grad = vec![0.0; self.dim];
let mut scores = Vec::new();
for i in 0..self.samples {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let mut testparampos = gen_rnd_vec_rng(&mut rng, self.dim, self.std); let mut testparamneg = testparampos.clone();
for ((pos, neg), p) in
testparampos.iter_mut().zip(testparamneg.iter_mut()).zip(self.params.iter())
{
*pos = *p + *pos;
*neg = *p - *neg;
}
let scorepos = self.eval.eval_train(&testparampos, t + iterations);
let scoreneg = self.eval.eval_train(&testparamneg, t + iterations);
scores.push((i, false, scorepos));
scores.push((i, true, scoreneg));
}
sort_scores(&mut scores);
scores.iter().enumerate().for_each(|(rank, (i, neg, _score))| {
let mut rng = SmallRng::seed_from_u64(seed + *i as u64);
let eps = gen_rnd_vec_rng(&mut rng, self.dim, self.std);
let negfactor = if *neg { -1.0 } else { 1.0 };
let centered_rank = rank as Float / (self.samples as Float - 0.5) - 1.0;
for (g, e) in grad.iter_mut().zip(eps.iter()) {
*g += *e * negfactor * centered_rank;
}
});
mul_scalar(&mut grad, 1.0 / ((2 * self.samples) as Float * self.std));
let delta = self.opt.get_delta(&self.params, &grad);
add_inplace(&mut self.params, &delta);
}
(self.eval.eval_test(&self.params), norm(&grad))
}
pub fn optimize_std(&mut self, n: usize) -> (Float, Float) {
let mut rng = thread_rng();
let mut grad = vec![0.0; self.dim];
let t = self.opt.get_t();
for iterations in 0..n {
let seed = rng.gen::<u64>() % (std::u64::MAX - self.samples as u64);
grad = vec![0.0; self.dim];
let mut scores = vec![(0.0, 0.0); self.samples];
scores.iter_mut().enumerate().for_each(|(i, (scorepos, scoreneg))| {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let mut testparampos = gen_rnd_vec_rng(&mut rng, self.dim, self.std); let mut testparamneg = testparampos.clone();
for ((pos, neg), p) in
testparampos.iter_mut().zip(testparamneg.iter_mut()).zip(self.params.iter())
{
*pos = *p + *pos;
*neg = *p - *neg;
}
*scorepos = self.eval.eval_train(&testparampos, t + iterations);
*scoreneg = self.eval.eval_train(&testparamneg, t + iterations);
});
let (_mean, std) = get_mean_std(&scores);
scores.iter().enumerate().for_each(|(i, (scorepos, scoreneg))| {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let eps = gen_rnd_vec_rng(&mut rng, self.dim, self.std);
for (g, e) in grad.iter_mut().zip(eps.iter()) {
*g += *e * (*scorepos - *scoreneg) / std;
}
});
mul_scalar(&mut grad, 1.0 / ((2 * self.samples) as Float * self.std));
let delta = self.opt.get_delta(&self.params, &grad);
add_inplace(&mut self.params, &delta);
}
(self.eval.eval_test(&self.params), norm(&grad))
}
pub fn optimize_norm(&mut self, n: usize) -> (Float, Float) {
let mut rng = thread_rng();
let mut grad = vec![0.0; self.dim];
let t = self.opt.get_t();
for iterations in 0..n {
let seed = rng.gen::<u64>() % (std::u64::MAX - self.samples as u64);
grad = vec![0.0; self.dim];
let mut scores = vec![(0.0, 0.0); self.samples];
let mut maximum = -1.0;
scores.iter_mut().enumerate().for_each(|(i, (scorepos, scoreneg))| {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let mut testparampos = gen_rnd_vec_rng(&mut rng, self.dim, self.std); let mut testparamneg = testparampos.clone();
for ((pos, neg), p) in
testparampos.iter_mut().zip(testparamneg.iter_mut()).zip(self.params.iter())
{
*pos = *p + *pos;
*neg = *p - *neg;
}
*scorepos = self.eval.eval_train(&testparampos, t + iterations);
*scoreneg = self.eval.eval_train(&testparamneg, t + iterations);
if scorepos.abs() > maximum {
maximum = scorepos.abs();
}
if scoreneg.abs() > maximum {
maximum = scoreneg.abs();
}
});
scores.iter().enumerate().for_each(|(i, (scorepos, scoreneg))| {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let eps = gen_rnd_vec_rng(&mut rng, self.dim, self.std);
for (g, e) in grad.iter_mut().zip(eps.iter()) {
*g += *e * (*scorepos - *scoreneg) / maximum;
}
});
mul_scalar(&mut grad, 1.0 / ((2 * self.samples) as Float * self.std));
let delta = self.opt.get_delta(&self.params, &grad);
add_inplace(&mut self.params, &delta);
}
(self.eval.eval_test(&self.params), norm(&grad))
}
pub fn optimize_par(&mut self, n: usize) -> (Float, Float)
where
Opt: Sync,
Feval: Sync,
{
let mut rng = thread_rng();
let mut grad = vec![0.0; self.dim];
let t = self.opt.get_t();
for iterations in 0..n {
let seed = rng.gen::<u64>() % (std::u64::MAX - self.samples as u64);
grad = (0..self.samples)
.into_par_iter()
.map(|i| {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let mut eps = gen_rnd_vec_rng(&mut rng, self.dim, self.std);
let mut testparampos = eps.clone();
let mut testparamneg = eps.clone();
for ((pos, neg), p) in
testparampos.iter_mut().zip(testparamneg.iter_mut()).zip(self.params.iter())
{
*pos = *p + *pos;
*neg = *p - *neg;
}
let scorepos = self.eval.eval_train(&testparampos, t + iterations);
let scoreneg = self.eval.eval_train(&testparamneg, t + iterations);
mul_scalar(&mut eps, scorepos - scoreneg);
eps
})
.reduce(
|| vec![0.0; self.dim],
|mut a, b| {
add_inplace(&mut a, &b);
a
},
);
mul_scalar(&mut grad, 1.0 / ((2 * self.samples) as Float * self.std));
let delta = self.opt.get_delta(&self.params, &grad);
add_inplace(&mut self.params, &delta);
}
(self.eval.eval_test(&self.params), norm(&grad))
}
pub fn optimize_ranked_par(&mut self, n: usize) -> (Float, Float)
where
Opt: Sync,
Feval: Sync,
{
let mut rng = thread_rng();
let mut grad = vec![0.0; self.dim];
let t = self.opt.get_t();
for iterations in 0..n {
let seed = rng.gen::<u64>() % (std::u64::MAX - self.samples as u64);
let mut scores = vec![(0, false, 0.0); 2 * self.samples];
for i in 0..self.samples {
scores[2 * i].0 = i;
scores[2 * i + 1].0 = i;
scores[2 * i + 1].1 = true;
}
scores.par_iter_mut().for_each(|(i, neg, score)| {
let mut rng = SmallRng::seed_from_u64(seed + *i as u64);
let mut testparam = gen_rnd_vec_rng(&mut rng, self.dim, self.std); if *neg {
mul_scalar(&mut testparam, -1.0);
}
add_inplace(&mut testparam, &self.params);
*score = self.eval.eval_train(&testparam, t + iterations);
});
sort_scores(&mut scores);
grad = scores
.par_iter()
.enumerate()
.map(|(rank, (i, neg, _score))| {
let mut rng = SmallRng::seed_from_u64(seed + *i as u64);
let mut eps = gen_rnd_vec_rng(&mut rng, self.dim, self.std);
let negfactor = if *neg { -1.0 } else { 1.0 };
let centered_rank = rank as Float / (self.samples as Float - 0.5) - 1.0;
mul_scalar(&mut eps, negfactor * centered_rank);
eps
})
.reduce(
|| vec![0.0; self.dim],
|mut a, b| {
add_inplace(&mut a, &b);
a
},
);
mul_scalar(&mut grad, 1.0 / ((2 * self.samples) as Float * self.std));
let delta = self.opt.get_delta(&self.params, &grad);
add_inplace(&mut self.params, &delta);
}
(self.eval.eval_test(&self.params), norm(&grad))
}
pub fn optimize_std_par(&mut self, n: usize) -> (Float, Float)
where
Opt: Sync,
Feval: Sync,
{
let mut rng = thread_rng();
let mut grad = vec![0.0; self.dim];
let t = self.opt.get_t();
for iterations in 0..n {
let seed = rng.gen::<u64>() % (std::u64::MAX - self.samples as u64);
let mut scores = vec![(0.0, 0.0); self.samples];
scores.par_iter_mut().enumerate().for_each(|(i, (scorepos, scoreneg))| {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let mut testparampos = gen_rnd_vec_rng(&mut rng, self.dim, self.std); let mut testparamneg = testparampos.clone();
for ((pos, neg), p) in
testparampos.iter_mut().zip(testparamneg.iter_mut()).zip(self.params.iter())
{
*pos = *p + *pos;
*neg = *p - *neg;
}
*scorepos = self.eval.eval_train(&testparampos, t + iterations);
*scoreneg = self.eval.eval_train(&testparamneg, t + iterations);
});
let (_mean, std) = get_mean_std(&scores);
grad = scores
.par_iter()
.enumerate()
.map(|(i, (scorepos, scoreneg))| {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let mut eps = gen_rnd_vec_rng(&mut rng, self.dim, self.std);
mul_scalar(&mut eps, (*scorepos - *scoreneg) / std);
eps
})
.reduce(
|| vec![0.0; self.dim],
|mut a, b| {
add_inplace(&mut a, &b);
a
},
);
mul_scalar(&mut grad, 1.0 / ((2 * self.samples) as Float * self.std));
let delta = self.opt.get_delta(&self.params, &grad);
add_inplace(&mut self.params, &delta);
}
(self.eval.eval_test(&self.params), norm(&grad))
}
pub fn optimize_norm_par(&mut self, n: usize) -> (Float, Float)
where
Opt: Sync,
Feval: Sync,
{
let mut rng = thread_rng();
let mut grad = vec![0.0; self.dim];
let t = self.opt.get_t();
for iterations in 0..n {
let seed = rng.gen::<u64>() % (std::u64::MAX - self.samples as u64);
let mut scores = vec![(0.0, 0.0); self.samples];
scores.par_iter_mut().enumerate().for_each(|(i, (scorepos, scoreneg))| {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let mut testparampos = gen_rnd_vec_rng(&mut rng, self.dim, self.std); let mut testparamneg = testparampos.clone();
for ((pos, neg), p) in
testparampos.iter_mut().zip(testparamneg.iter_mut()).zip(self.params.iter())
{
*pos = *p + *pos;
*neg = *p - *neg;
}
*scorepos = self.eval.eval_train(&testparampos, t + iterations);
*scoreneg = self.eval.eval_train(&testparamneg, t + iterations);
});
let mut maximum = -1.0;
scores.iter().for_each(|x| {
if x.0.abs() > maximum {
maximum = x.0.abs();
}
if x.1.abs() > maximum {
maximum = x.1.abs();
}
});
grad = scores
.par_iter()
.enumerate()
.map(|(i, (scorepos, scoreneg))| {
let mut rng = SmallRng::seed_from_u64(seed + i as u64);
let mut eps = gen_rnd_vec_rng(&mut rng, self.dim, self.std);
mul_scalar(&mut eps, (*scorepos - *scoreneg) / maximum);
eps
})
.reduce(
|| vec![0.0; self.dim],
|mut a, b| {
add_inplace(&mut a, &b);
a
},
);
mul_scalar(&mut grad, 1.0 / ((2 * self.samples) as Float * self.std));
let delta = self.opt.get_delta(&self.params, &grad);
add_inplace(&mut self.params, &delta);
}
(self.eval.eval_test(&self.params), norm(&grad))
}
}
fn gen_rnd_vec_rng<RNG: Rng>(rng: &mut RNG, n: usize, std: Float) -> Vec<Float> {
let normal =
Normal::new(0.0, f64::from(std)).expect("Invalid parameters for Normal distribution!");
normal.sample_iter(rng).take(n).map(|x| x as Float).collect()
}
#[must_use]
pub fn gen_rnd_vec(n: usize, std: Float) -> Vec<Float> {
let mut rng = thread_rng();
let normal =
Normal::new(0.0, f64::from(std)).expect("Invalid parameters for Normal distribution!");
normal.sample_iter(&mut rng).take(n).map(|x| x as Float).collect()
}
fn add_inplace(v1: &mut [Float], v2: &[Float]) {
for (val1, val2) in v1.iter_mut().zip(v2.iter()) {
*val1 += *val2;
}
}
fn mul_scalar(vec: &mut [Float], scalar: Float) {
for val in vec.iter_mut() {
*val *= scalar;
}
}
#[must_use]
fn norm(vec: &[Float]) -> Float {
let mut norm = 0.0;
for val in vec.iter() {
norm += *val * *val;
}
norm.sqrt()
}
#[must_use]
fn get_mean_std(vec: &[(Float, Float)]) -> (Float, Float) {
let mut mean = 0.0;
vec.iter().for_each(|(scorepos, scoreneg)| {
mean += *scorepos + *scoreneg;
});
mean /= (2 * vec.len()) as Float;
let mut std = 0.0;
vec.iter().for_each(|(scorepos, scoreneg)| {
let mut diff = *scorepos - mean;
std += diff * diff;
diff = *scoreneg - mean;
std += diff * diff;
});
std /= (2 * vec.len()) as Float;
std = std.sqrt();
(mean, std)
}
fn sort_scores<T, U>(vec: &mut [(T, U, Float)]) {
vec.sort_unstable_by(|r1, r2| {
(r1.2).partial_cmp(&r2.2).unwrap_or_else(|| {
if r1.2.is_nan() {
if r2.2.is_nan() {
Ordering::Equal
} else {
Ordering::Less
}
} else {
Ordering::Greater
}
})
});
}