use core::f64;
use std::slice::from_ref;
use nalgebra::{
ComplexField, DMatrix, DMatrixView, DVector, DVectorView, Dyn, Matrix, RowDVector, Scalar,
ViewStorage,
};
use rand::Rng;
use rand_distr::StandardNormal;
pub mod test_functions;
fn bc_element<T: Scalar>(
elem: &T,
nrows: usize,
ncols: usize,
) -> Matrix<T, Dyn, Dyn, ViewStorage<'_, T, Dyn, Dyn, Dyn, Dyn>> {
DMatrixView::from_slice_with_strides(from_ref(elem), nrows, ncols, 0, 0)
}
fn bc_column<T: Scalar>(
vec: &DVector<T>,
ncols: usize,
) -> Matrix<T, Dyn, Dyn, ViewStorage<'_, T, Dyn, Dyn, Dyn, Dyn>> {
DMatrixView::from_slice_with_strides(vec.as_slice(), vec.len(), ncols, 1, 0)
}
fn bc_row<T: Scalar>(
vec: &RowDVector<T>,
nrows: usize,
) -> Matrix<T, Dyn, Dyn, ViewStorage<'_, T, Dyn, Dyn, Dyn, Dyn>> {
DMatrixView::from_slice_with_strides(vec.as_slice(), nrows, vec.len(), 0, 1)
}
pub fn rec_lamb(dim: usize) -> usize {
let x = ((dim as f64).ln() * 3.0).floor() as usize;
if x % 2 == 0 {
x + 4
} else {
x + 5
}
}
fn cexp(a: f64) -> f64 {
a.min(100.0).exp() }
fn f(a: f64, dim: usize) -> f64 {
((1. + a * a) * cexp(a * a / 2.) / 0.24) - 10. - dim as f64
}
fn f_prime(a: f64) -> f64 {
(1. / 0.24) * a * cexp(a * a / 2.) * (3. + a * a)
}
fn get_h_inv(dim: usize) -> f64 {
let mut h_inv = 1.0;
while (f(h_inv, dim)).abs() > 1e-10 {
h_inv = h_inv - 0.5 * (f(h_inv, dim) / f_prime(h_inv));
}
h_inv
}
fn num_feasible(evals: &[f64]) -> usize {
evals.iter().filter(|e| e.is_finite()).count()
}
fn sort_indices_by(evals: &[f64], z: DMatrixView<f64>) -> Vec<usize> {
let lam = evals.len();
let distances: Vec<f64> = (0..lam).map(|i| z.column(i).norm_squared()).collect();
sort_index(evals, &distances)
}
fn sort_index(primary: &[f64], secondary: &[f64]) -> Vec<usize> {
assert_eq!(primary.len(), secondary.len());
let mut indices: Vec<usize> = (0..primary.len()).collect();
indices.sort_unstable_by(
|a, b| match (primary[*a].is_finite(), primary[*b].is_finite()) {
(true, true) => primary[*a].total_cmp(&primary[*b]),
(true, false) => std::cmp::Ordering::Less,
(false, true) => std::cmp::Ordering::Greater,
(false, false) => secondary[*a].total_cmp(&secondary[*b]),
},
);
indices
}
#[derive(Clone, Debug)]
#[allow(non_snake_case)]
struct State {
sigma: f64,
m: DVector<f64>,
D: DVector<f64>,
v: DVector<f64>,
pc: DVector<f64>,
ps: DVector<f64>,
g: usize,
}
#[derive(Clone, Debug)]
pub struct CrfmnesOptimizer {
dim: usize,
lamb: usize,
w_rank_hat: DVector<f64>,
w_rank: DVector<f64>,
mueff: f64,
cs: f64,
cc: f64,
c1_cma: f64,
chi_n: f64,
h_inv: f64,
eta_m: f64,
eta_move_sigma: f64,
state: State,
}
impl CrfmnesOptimizer {
#[allow(non_snake_case)]
pub fn new<R: Rng>(m: DVector<f64>, sigma: f64, lamb: usize, rand: &mut R) -> Self {
let dim = m.len();
let v = DVector::from_fn(dim, |_, _| {
rand.sample::<f64, _>(StandardNormal) / (dim as f64).sqrt()
});
let D = DVector::from_element(dim, 1.0);
Self::with_v_D(m, sigma, v, D, lamb)
}
#[allow(non_snake_case)]
pub fn with_v_D(
m: DVector<f64>,
sigma: f64,
v: DVector<f64>,
D: DVector<f64>,
lamb: usize,
) -> Self {
let lamb = lamb + lamb % 2;
assert!(!m.is_empty());
assert!(lamb >= 4);
assert!(sigma > 0.0);
assert_eq!(m.len(), v.len());
assert_eq!(m.len(), D.len());
let dim = m.len();
let mu = lamb / 2;
let w_rank_hat = DVector::from_fn(lamb, |row, _| {
((mu as f64 + 1.0).ln() - ((row + 1) as f64).ln()).max(0.0)
});
let w_rank: DVector<f64> = (w_rank_hat.clone() / w_rank_hat.sum())
- DVectorView::from_slice_with_strides(&[1. / lamb as f64], lamb, 0, 0);
let mueff = 1.
/ w_rank.fold(0.0, |acc, e| {
let q = e + (1. / lamb as f64);
acc + q * q
});
let cs = (mueff + 2.) / (dim as f64 + mueff + 5.);
let cc = (4. + mueff / dim as f64) / (dim as f64 + 4. + 2. * mueff / dim as f64);
let c1_cma = 2. / ((dim as f64 + 1.3).powi(2) + mueff);
let chi_n = (dim as f64).sqrt()
* (1.0 - 1.0 / (4.0 * dim as f64) + 1.0 / (21.0 * dim as f64 * dim as f64));
let pc = DVector::zeros(dim);
let ps = DVector::zeros(dim);
let h_inv = get_h_inv(dim);
let eta_m = 1.0;
let eta_move_sigma = 1.0;
Self {
dim,
lamb,
w_rank_hat,
w_rank,
mueff,
cs,
cc,
c1_cma,
chi_n,
h_inv,
eta_m,
eta_move_sigma,
state: State {
sigma,
m,
D,
v,
pc,
ps,
g: 0,
},
}
}
pub fn ask<'a, R: Rng>(&'a mut self, rand: &mut R) -> Trial<'a> {
let State {
sigma,
ref m,
ref D,
ref v,
..
} = &self.state;
let mut z = DMatrix::<f64>::zeros(self.dim, self.lamb);
for i in 0..self.lamb / 2 {
for j in 0..self.dim {
let val: f64 = rand.sample(StandardNormal);
*z.get_mut((j, i)).unwrap() = val;
*z.get_mut((j, i + self.lamb / 2)).unwrap() = -val;
}
}
let normv2 = v.norm_squared();
let normv = normv2.sqrt();
let vbar = v / normv;
let y = &z + (((1.0 + normv2).sqrt() - 1.0) * (&vbar * (vbar.transpose() * &z)));
let x = *sigma * y.component_mul(&bc_column(D, self.lamb)) + bc_column(m, self.lamb);
Trial { opt: self, z, y, x }
}
pub fn update_count(&self) -> usize {
self.state.g
}
pub fn lamb(&self) -> usize {
self.lamb
}
pub fn dim(&self) -> usize {
self.dim
}
fn c1(&self, lamb_feas: usize) -> f64 {
self.c1_cma * (self.dim.saturating_sub(6) + 1) as f64 / 6.0
* (lamb_feas as f64 / self.lamb as f64)
}
#[allow(non_snake_case)]
fn eta_B(&self, lamb_feas: usize) -> f64 {
(((0.02 * lamb_feas as f64).min(3.0 * (self.dim as f64).ln()) + 5.0)
/ (0.23 * self.dim as f64 + 25.0))
.tanh()
}
fn alpha_dist(&self, lamb_feas: usize) -> f64 {
self.h_inv
* ((self.lamb as f64) / self.dim as f64).sqrt().min(1.0)
* (lamb_feas as f64 / self.lamb as f64).sqrt()
}
fn w_dist_hat(&self, z: DVectorView<f64>, lamb_feas: usize) -> f64 {
cexp(self.alpha_dist(lamb_feas) * z.norm())
}
fn eta_stag_sigma(&self, lamb_feas: usize) -> f64 {
((0.024 * lamb_feas as f64 + 0.7 * self.dim as f64 + 20.) / (self.dim as f64 + 12.)).tanh()
}
fn eta_conv_sigma(&self, lamb_feas: usize) -> f64 {
2. * ((0.025 * lamb_feas as f64 + 0.75 * self.dim as f64 + 10.) / (self.dim as f64 + 4.))
.tanh()
}
}
#[derive(Debug, Clone)]
pub enum TrialError {
NoFeasibleSolutions,
DivByZero,
DiagonalInverted,
}
#[derive(Debug)]
pub struct Trial<'a> {
opt: &'a mut CrfmnesOptimizer,
z: DMatrix<f64>,
y: DMatrix<f64>,
x: DMatrix<f64>,
}
impl<'a> Trial<'a> {
pub fn x(&self) -> DMatrixView<f64> {
self.x.as_view()
}
#[allow(non_snake_case)]
pub fn tell(&mut self, evs: Vec<f64>) -> Result<(), TrialError> {
assert_eq!(evs.len(), self.x.ncols());
let lamb_feas = num_feasible(&evs);
if lamb_feas == 0 {
return Err(TrialError::NoFeasibleSolutions);
}
let mut new_state = self.opt.state.clone();
let normv2 = new_state.v.norm_squared();
let normv = normv2.sqrt();
let normv4 = normv2 * normv2;
let vbar = &new_state.v / normv;
let lamb = self.opt.lamb;
let dim = self.opt.dim;
let sorted_indices = sort_indices_by(&evs, self.z.as_view());
let x = DMatrix::from_fn(dim, lamb, |row, col| {
*self.x.get((row, sorted_indices[col])).unwrap()
});
let y = DMatrix::from_fn(dim, lamb, |row, col| {
*self.y.get((row, sorted_indices[col])).unwrap()
});
let z = DMatrix::from_fn(dim, lamb, |row, col| {
*self.z.get((row, sorted_indices[col])).unwrap()
});
new_state.g += 1;
new_state.ps = (1.0 - self.opt.cs) * new_state.ps
+ (&z * &self.opt.w_rank) * (self.opt.cs * (2. - self.opt.cs) * self.opt.mueff).sqrt();
let ps_norm = new_state.ps.norm();
let weights_dist: DVector<f64> = {
let mut w_tmp: Vec<f64> = (0..lamb)
.map(|k| self.opt.w_rank_hat[k] * self.opt.w_dist_hat(z.column(k), lamb_feas))
.collect();
let sum: f64 = w_tmp.iter().sum();
for e in &mut w_tmp {
*e = (*e / sum) - 1. / lamb as f64;
}
DVector::from_vec(w_tmp)
};
let weights: DVectorView<f64> = if ps_norm >= self.opt.chi_n {
weights_dist.as_view()
} else {
self.opt.w_rank.as_view()
};
let eta_sigma = if ps_norm >= self.opt.chi_n {
self.opt.eta_move_sigma
} else if ps_norm >= 0.1 * self.opt.chi_n {
self.opt.eta_stag_sigma(lamb_feas)
} else {
self.opt.eta_conv_sigma(lamb_feas)
};
let wxm: DVector<f64> = (x - bc_column(&new_state.m, lamb)) * weights;
new_state.pc = (1. - self.opt.cc) * &new_state.pc
+ (self.opt.cc * (2. - self.opt.cc) * self.opt.mueff).sqrt() / new_state.sigma * &wxm;
new_state.m += self.opt.eta_m * wxm;
let exY = DMatrix::from_fn(self.opt.dim, lamb + 1, |r, c| {
if c < lamb {
*y.get((r, c)).unwrap()
} else {
*new_state.pc.get(r).unwrap() / new_state.D.get(r).unwrap()
}
});
let yy: DMatrix<f64> = exY.map(|e| e * e);
let ip_yvbar: RowDVector<f64> = vbar.transpose() * &exY;
let vbar_bc = bc_column(&vbar, lamb + 1);
let yvbar: DMatrix<f64> = exY.component_mul(&vbar_bc); let gammav: f64 = 1. + normv2;
let vbarbar: DVector<f64> = vbar.map(|e| e * e);
let alphavd: f64 = 1.0f64
.min((normv4 + (2.0 * gammav - gammav.sqrt()) / vbarbar.max()).sqrt() / (2. + normv2));
let ibg: RowDVector<f64> = ip_yvbar.map(|e| e * e + gammav); let mut t: DMatrix<f64> = (exY.component_mul(&bc_row(&ip_yvbar, dim)))
- (vbar_bc.component_mul(&bc_row(&ibg, dim))) / 2.;
let b: f64 = -(1.0 - alphavd * alphavd) * normv4 / gammav + 2.0 * alphavd * alphavd;
let H: DVector<f64> =
DVector::from_element(self.opt.dim, 2.0) - (b + 2.0 * alphavd * alphavd) * &vbarbar; let invH: DVector<f64> = H.map(|e| 1.0 / e); let s_step1: DMatrix<f64> = yy
- normv2 / gammav * (yvbar.component_mul(&bc_row(&ip_yvbar, dim)))
- bc_element(&1.0, dim, lamb + 1);
let ip_vbart: RowDVector<f64> = vbar.transpose() * &t; let s_step2: DMatrix<f64> = s_step1
- (alphavd / gammav
* ((2.0 + normv2) * (t.component_mul(&vbar_bc))
- (normv2 * (&vbarbar * ip_vbart))));
let invHvbarbar: DVector<f64> = invH.component_mul(&vbarbar);
let ip_s_step2invHvbarbar: RowDVector<f64> = invHvbarbar.transpose() * &s_step2;
let div: f64 = 1.0 + b * (vbarbar.transpose() * &invHvbarbar).as_scalar();
if div.abs() < 1e-10 {
return Err(TrialError::DivByZero);
}
let s: DMatrix<f64> = (s_step2.component_mul(&bc_column(&invH, lamb + 1)))
- ((b / div) * (invHvbarbar * ip_s_step2invHvbarbar));
let ip_svbarbar: RowDVector<f64> = vbarbar.transpose() * &s; t -= alphavd * ((2.0 + normv2) * (s.component_mul(&vbar_bc)) - (&vbar * ip_svbarbar));
let mut exw = DVector::zeros(lamb + 1);
let eta_B = self.opt.eta_B(lamb_feas);
for k in 0..lamb {
exw[k] = eta_B * weights[k];
}
exw[lamb] = self.opt.c1(lamb_feas);
new_state.v += (t * &exw) / normv;
new_state.D += (s * &exw).component_mul(&new_state.D);
if new_state.D.min() < 0.0 {
return Err(TrialError::DiagonalInverted);
}
let nthrootdetA = cexp(
new_state.D.map(|e| e.ln()).sum() / dim as f64
+ (1.0 + (new_state.v.transpose() * &new_state.v).as_scalar()).ln()
/ (2.0 * dim as f64),
);
new_state.D = new_state.D.map(|e| e / nthrootdetA);
let G_s = ((z.map(|e| e * e) - bc_element(&1.0, dim, lamb)) * weights).sum() / dim as f64;
new_state.sigma *= cexp(eta_sigma / 2.0 * G_s);
self.opt.state = new_state;
Ok(())
}
}