1use core::f64;
21use std::slice::from_ref;
22
23use nalgebra::{
24 ComplexField, DMatrix, DMatrixView, DVector, DVectorView, Dyn, Matrix, RowDVector, Scalar,
25 ViewStorage,
26};
27
28use rand::Rng;
29use rand_distr::StandardNormal;
30
31pub mod test_functions;
32
33fn bc_element<T: Scalar>(
35 elem: &T,
36 nrows: usize,
37 ncols: usize,
38) -> Matrix<T, Dyn, Dyn, ViewStorage<'_, T, Dyn, Dyn, Dyn, Dyn>> {
39 DMatrixView::from_slice_with_strides(from_ref(elem), nrows, ncols, 0, 0)
40}
41
42fn bc_column<T: Scalar>(
44 vec: &DVector<T>,
45 ncols: usize,
46) -> Matrix<T, Dyn, Dyn, ViewStorage<'_, T, Dyn, Dyn, Dyn, Dyn>> {
47 DMatrixView::from_slice_with_strides(vec.as_slice(), vec.len(), ncols, 1, 0)
48}
49
50fn bc_row<T: Scalar>(
52 vec: &RowDVector<T>,
53 nrows: usize,
54) -> Matrix<T, Dyn, Dyn, ViewStorage<'_, T, Dyn, Dyn, Dyn, Dyn>> {
55 DMatrixView::from_slice_with_strides(vec.as_slice(), nrows, vec.len(), 0, 1)
56}
57
58pub fn rec_lamb(dim: usize) -> usize {
62 let x = ((dim as f64).ln() * 3.0).floor() as usize;
63 if x % 2 == 0 {
64 x + 4
65 } else {
66 x + 5
67 }
68}
69
70fn cexp(a: f64) -> f64 {
71 a.min(100.0).exp() }
73
74fn f(a: f64, dim: usize) -> f64 {
75 ((1. + a * a) * cexp(a * a / 2.) / 0.24) - 10. - dim as f64
76}
77
78fn f_prime(a: f64) -> f64 {
79 (1. / 0.24) * a * cexp(a * a / 2.) * (3. + a * a)
80}
81
82fn get_h_inv(dim: usize) -> f64 {
83 let mut h_inv = 1.0;
84 while (f(h_inv, dim)).abs() > 1e-10 {
85 h_inv = h_inv - 0.5 * (f(h_inv, dim) / f_prime(h_inv));
86 }
87 h_inv
88}
89
90fn num_feasible(evals: &[f64]) -> usize {
91 evals.iter().filter(|e| e.is_finite()).count()
92}
93
94fn sort_indices_by(evals: &[f64], z: DMatrixView<f64>) -> Vec<usize> {
95 let lam = evals.len();
96
97 let distances: Vec<f64> = (0..lam).map(|i| z.column(i).norm_squared()).collect();
98 sort_index(evals, &distances)
99}
100
101fn sort_index(primary: &[f64], secondary: &[f64]) -> Vec<usize> {
104 assert_eq!(primary.len(), secondary.len());
105 let mut indices: Vec<usize> = (0..primary.len()).collect();
106
107 indices.sort_unstable_by(
108 |a, b| match (primary[*a].is_finite(), primary[*b].is_finite()) {
109 (true, true) => primary[*a].total_cmp(&primary[*b]),
110 (true, false) => std::cmp::Ordering::Less,
111 (false, true) => std::cmp::Ordering::Greater,
112 (false, false) => secondary[*a].total_cmp(&secondary[*b]),
113 },
114 );
115 indices
116}
117
118#[derive(Clone, Debug)]
119#[allow(non_snake_case)]
120struct State {
121 sigma: f64,
122
123 m: DVector<f64>,
125
126 D: DVector<f64>,
128
129 v: DVector<f64>,
131
132 pc: DVector<f64>,
134
135 ps: DVector<f64>,
137
138 g: usize,
140}
141
142#[derive(Clone, Debug)]
184pub struct CrfmnesOptimizer {
185 dim: usize,
187
188 lamb: usize,
190
191 w_rank_hat: DVector<f64>,
192 w_rank: DVector<f64>,
193
194 mueff: f64,
195 cs: f64,
196 cc: f64,
197 c1_cma: f64,
198
199 chi_n: f64,
201
202 h_inv: f64,
204
205 eta_m: f64,
207 eta_move_sigma: f64,
208
209 state: State,
210}
211
212impl CrfmnesOptimizer {
213 #[allow(non_snake_case)]
219 pub fn new<R: Rng>(m: DVector<f64>, sigma: f64, lamb: usize, rand: &mut R) -> Self {
220 let dim = m.len();
221
222 let v = DVector::from_fn(dim, |_, _| {
223 rand.sample::<f64, _>(StandardNormal) / (dim as f64).sqrt()
224 });
225 let D = DVector::from_element(dim, 1.0);
226
227 Self::with_v_D(m, sigma, v, D, lamb)
228 }
229
230 #[allow(non_snake_case)]
246 pub fn with_v_D(
247 m: DVector<f64>,
248 sigma: f64,
249 v: DVector<f64>,
250 D: DVector<f64>,
251 lamb: usize,
252 ) -> Self {
253 let lamb = lamb + lamb % 2;
254 assert!(!m.is_empty());
255 assert!(lamb >= 4);
256 assert!(sigma > 0.0);
257 assert_eq!(m.len(), v.len());
258 assert_eq!(m.len(), D.len());
259
260 let dim = m.len();
261 let mu = lamb / 2;
262 let w_rank_hat = DVector::from_fn(lamb, |row, _| {
263 ((mu as f64 + 1.0).ln() - ((row + 1) as f64).ln()).max(0.0)
264 });
265
266 let w_rank: DVector<f64> = (w_rank_hat.clone() / w_rank_hat.sum())
267 - DVectorView::from_slice_with_strides(&[1. / lamb as f64], lamb, 0, 0);
268
269 let mueff = 1.
270 / w_rank.fold(0.0, |acc, e| {
271 let q = e + (1. / lamb as f64);
272 acc + q * q
273 });
274
275 let cs = (mueff + 2.) / (dim as f64 + mueff + 5.);
276 let cc = (4. + mueff / dim as f64) / (dim as f64 + 4. + 2. * mueff / dim as f64);
277 let c1_cma = 2. / ((dim as f64 + 1.3).powi(2) + mueff);
278 let chi_n = (dim as f64).sqrt()
280 * (1.0 - 1.0 / (4.0 * dim as f64) + 1.0 / (21.0 * dim as f64 * dim as f64));
281 let pc = DVector::zeros(dim);
282 let ps = DVector::zeros(dim);
283 let h_inv = get_h_inv(dim);
285 let eta_m = 1.0;
287 let eta_move_sigma = 1.0;
288
289 Self {
290 dim,
291
292 lamb,
293
294 w_rank_hat,
295 w_rank,
296 mueff,
297 cs,
298 cc,
299 c1_cma,
300 chi_n,
301
302 h_inv,
303 eta_m,
304 eta_move_sigma,
305
306 state: State {
307 sigma,
308
309 m,
310 D,
311 v,
312
313 pc,
314 ps,
315
316 g: 0,
317 },
318 }
319 }
320
321 pub fn ask<'a, R: Rng>(&'a mut self, rand: &mut R) -> Trial<'a> {
323 let State {
324 sigma,
325 ref m,
326 ref D,
327 ref v,
328 ..
329 } = &self.state;
330
331 let mut z = DMatrix::<f64>::zeros(self.dim, self.lamb);
332
333 for i in 0..self.lamb / 2 {
334 for j in 0..self.dim {
335 let val: f64 = rand.sample(StandardNormal);
336 *z.get_mut((j, i)).unwrap() = val;
337 *z.get_mut((j, i + self.lamb / 2)).unwrap() = -val;
338 }
339 }
340
341 let normv2 = v.norm_squared();
342 let normv = normv2.sqrt();
343 let vbar = v / normv;
344 let y = &z + (((1.0 + normv2).sqrt() - 1.0) * (&vbar * (vbar.transpose() * &z)));
345
346 let x = *sigma * y.component_mul(&bc_column(D, self.lamb)) + bc_column(m, self.lamb);
347
348 Trial { opt: self, z, y, x }
349 }
350
351 pub fn update_count(&self) -> usize {
353 self.state.g
354 }
355
356 pub fn lamb(&self) -> usize {
358 self.lamb
359 }
360
361 pub fn dim(&self) -> usize {
363 self.dim
364 }
365
366 fn c1(&self, lamb_feas: usize) -> f64 {
367 self.c1_cma * (self.dim.saturating_sub(6) + 1) as f64 / 6.0
368 * (lamb_feas as f64 / self.lamb as f64)
369 }
370
371 #[allow(non_snake_case)]
372 fn eta_B(&self, lamb_feas: usize) -> f64 {
373 (((0.02 * lamb_feas as f64).min(3.0 * (self.dim as f64).ln()) + 5.0)
374 / (0.23 * self.dim as f64 + 25.0))
375 .tanh()
376 }
377
378 fn alpha_dist(&self, lamb_feas: usize) -> f64 {
379 self.h_inv
380 * ((self.lamb as f64) / self.dim as f64).sqrt().min(1.0)
381 * (lamb_feas as f64 / self.lamb as f64).sqrt()
382 }
383
384 fn w_dist_hat(&self, z: DVectorView<f64>, lamb_feas: usize) -> f64 {
385 cexp(self.alpha_dist(lamb_feas) * z.norm())
386 }
387
388 fn eta_stag_sigma(&self, lamb_feas: usize) -> f64 {
389 ((0.024 * lamb_feas as f64 + 0.7 * self.dim as f64 + 20.) / (self.dim as f64 + 12.)).tanh()
390 }
391
392 fn eta_conv_sigma(&self, lamb_feas: usize) -> f64 {
393 2. * ((0.025 * lamb_feas as f64 + 0.75 * self.dim as f64 + 10.) / (self.dim as f64 + 4.))
394 .tanh()
395 }
396}
397
398#[derive(Debug, Clone)]
402pub enum TrialError {
403 NoFeasibleSolutions,
405 DivByZero,
407 DiagonalInverted,
409}
410
411#[derive(Debug)]
413pub struct Trial<'a> {
414 opt: &'a mut CrfmnesOptimizer,
415
416 z: DMatrix<f64>,
421
422 y: DMatrix<f64>,
427
428 x: DMatrix<f64>,
432}
433
434impl<'a> Trial<'a> {
435 pub fn x(&self) -> DMatrixView<f64> {
437 self.x.as_view()
438 }
439
440 #[allow(non_snake_case)]
449 pub fn tell(&mut self, evs: Vec<f64>) -> Result<(), TrialError> {
450 assert_eq!(evs.len(), self.x.ncols());
453
454 let lamb_feas = num_feasible(&evs);
456
457 if lamb_feas == 0 {
458 return Err(TrialError::NoFeasibleSolutions);
459 }
460
461 let mut new_state = self.opt.state.clone();
462
463 let normv2 = new_state.v.norm_squared();
464 let normv = normv2.sqrt();
465 let normv4 = normv2 * normv2;
466
467 let vbar = &new_state.v / normv;
468
469 let lamb = self.opt.lamb;
470 let dim = self.opt.dim;
471
472 let sorted_indices = sort_indices_by(&evs, self.z.as_view());
473
474 let x = DMatrix::from_fn(dim, lamb, |row, col| {
475 *self.x.get((row, sorted_indices[col])).unwrap()
476 });
477 let y = DMatrix::from_fn(dim, lamb, |row, col| {
478 *self.y.get((row, sorted_indices[col])).unwrap()
479 });
480 let z = DMatrix::from_fn(dim, lamb, |row, col| {
481 *self.z.get((row, sorted_indices[col])).unwrap()
482 });
483
484 new_state.g += 1;
485
486 new_state.ps = (1.0 - self.opt.cs) * new_state.ps
488 + (&z * &self.opt.w_rank) * (self.opt.cs * (2. - self.opt.cs) * self.opt.mueff).sqrt();
489 let ps_norm = new_state.ps.norm();
490
491 let weights_dist: DVector<f64> = {
493 let mut w_tmp: Vec<f64> = (0..lamb)
494 .map(|k| self.opt.w_rank_hat[k] * self.opt.w_dist_hat(z.column(k), lamb_feas))
495 .collect();
496 let sum: f64 = w_tmp.iter().sum();
497 for e in &mut w_tmp {
498 *e = (*e / sum) - 1. / lamb as f64;
499 }
500 DVector::from_vec(w_tmp)
501 };
502
503 let weights: DVectorView<f64> = if ps_norm >= self.opt.chi_n {
505 weights_dist.as_view()
506 } else {
507 self.opt.w_rank.as_view()
508 };
509 let eta_sigma = if ps_norm >= self.opt.chi_n {
510 self.opt.eta_move_sigma
511 } else if ps_norm >= 0.1 * self.opt.chi_n {
512 self.opt.eta_stag_sigma(lamb_feas)
513 } else {
514 self.opt.eta_conv_sigma(lamb_feas)
515 };
516
517 let wxm: DVector<f64> = (x - bc_column(&new_state.m, lamb)) * weights;
519 new_state.pc = (1. - self.opt.cc) * &new_state.pc
520 + (self.opt.cc * (2. - self.opt.cc) * self.opt.mueff).sqrt() / new_state.sigma * &wxm;
521 new_state.m += self.opt.eta_m * wxm;
522
523 let exY = DMatrix::from_fn(self.opt.dim, lamb + 1, |r, c| {
526 if c < lamb {
527 *y.get((r, c)).unwrap()
528 } else {
529 *new_state.pc.get(r).unwrap() / new_state.D.get(r).unwrap()
530 }
531 }); let yy: DMatrix<f64> = exY.map(|e| e * e); let ip_yvbar: RowDVector<f64> = vbar.transpose() * &exY; let vbar_bc = bc_column(&vbar, lamb + 1); let yvbar: DMatrix<f64> = exY.component_mul(&vbar_bc); let gammav: f64 = 1. + normv2;
541 let vbarbar: DVector<f64> = vbar.map(|e| e * e);
542 let alphavd: f64 = 1.0f64
543 .min((normv4 + (2.0 * gammav - gammav.sqrt()) / vbarbar.max()).sqrt() / (2. + normv2)); let ibg: RowDVector<f64> = ip_yvbar.map(|e| e * e + gammav); let mut t: DMatrix<f64> = (exY.component_mul(&bc_row(&ip_yvbar, dim)))
547 - (vbar_bc.component_mul(&bc_row(&ibg, dim))) / 2.; let b: f64 = -(1.0 - alphavd * alphavd) * normv4 / gammav + 2.0 * alphavd * alphavd;
550 let H: DVector<f64> =
551 DVector::from_element(self.opt.dim, 2.0) - (b + 2.0 * alphavd * alphavd) * &vbarbar; let invH: DVector<f64> = H.map(|e| 1.0 / e); let s_step1: DMatrix<f64> = yy
554 - normv2 / gammav * (yvbar.component_mul(&bc_row(&ip_yvbar, dim)))
555 - bc_element(&1.0, dim, lamb + 1); let ip_vbart: RowDVector<f64> = vbar.transpose() * &t; let s_step2: DMatrix<f64> = s_step1
559 - (alphavd / gammav
560 * ((2.0 + normv2) * (t.component_mul(&vbar_bc))
561 - (normv2 * (&vbarbar * ip_vbart)))); let invHvbarbar: DVector<f64> = invH.component_mul(&vbarbar);
564 let ip_s_step2invHvbarbar: RowDVector<f64> = invHvbarbar.transpose() * &s_step2; let div: f64 = 1.0 + b * (vbarbar.transpose() * &invHvbarbar).as_scalar();
567 if div.abs() < 1e-10 {
568 return Err(TrialError::DivByZero);
569 }
570
571 let s: DMatrix<f64> = (s_step2.component_mul(&bc_column(&invH, lamb + 1)))
572 - ((b / div) * (invHvbarbar * ip_s_step2invHvbarbar)); let ip_svbarbar: RowDVector<f64> = vbarbar.transpose() * &s; t -= alphavd * ((2.0 + normv2) * (s.component_mul(&vbar_bc)) - (&vbar * ip_svbarbar)); let mut exw = DVector::zeros(lamb + 1);
579 let eta_B = self.opt.eta_B(lamb_feas);
580 for k in 0..lamb {
581 exw[k] = eta_B * weights[k];
582 }
583 exw[lamb] = self.opt.c1(lamb_feas);
584
585 new_state.v += (t * &exw) / normv;
586 new_state.D += (s * &exw).component_mul(&new_state.D);
587
588 if new_state.D.min() < 0.0 {
590 return Err(TrialError::DiagonalInverted);
591 }
592 let nthrootdetA = cexp(
593 new_state.D.map(|e| e.ln()).sum() / dim as f64
594 + (1.0 + (new_state.v.transpose() * &new_state.v).as_scalar()).ln()
595 / (2.0 * dim as f64),
596 );
597
598 new_state.D = new_state.D.map(|e| e / nthrootdetA);
599
600 let G_s = ((z.map(|e| e * e) - bc_element(&1.0, dim, lamb)) * weights).sum() / dim as f64;
602 new_state.sigma *= cexp(eta_sigma / 2.0 * G_s);
603
604 self.opt.state = new_state;
606
607 Ok(())
608 }
609}