use crate::error::OptimizeError;
use scirs2_core::ndarray::{Array1, Array2, Axis};
use scirs2_core::random::rand_distributions::Distribution;
use scirs2_core::random::rngs::StdRng;
use scirs2_core::random::{Normal, SeedableRng};
#[derive(Debug, Clone)]
pub struct CmaEsConfig {
pub population_size: Option<usize>,
pub initial_sigma: f64,
pub max_fevals: usize,
pub ftol: f64,
pub xtol: f64,
pub seed: u64,
}
impl Default for CmaEsConfig {
fn default() -> Self {
Self {
population_size: None,
initial_sigma: 0.3,
max_fevals: 10_000,
ftol: 1e-10,
xtol: 1e-10,
seed: 0,
}
}
}
#[derive(Debug, Clone)]
pub struct CmaEsResult {
pub x: Array1<f64>,
pub fun: f64,
pub fevals: usize,
pub generations: usize,
pub converged: bool,
pub message: String,
}
pub struct CmaEs {
n: usize, lambda: usize, mu: usize, weights: Vec<f64>, mueff: f64,
cs: f64, ds: f64, chi_n: f64,
cc: f64, c1: f64, cmu: f64,
mean: Array1<f64>, sigma: f64, ps: Array1<f64>, pc: Array1<f64>, cov: Array2<f64>,
eigenvalues: Array1<f64>, eigenvectors: Array2<f64>,
rng: StdRng,
normal_dist: Normal<f64>,
pub fevals: usize,
pub generations: usize,
}
impl CmaEs {
pub fn new(x0: Array1<f64>, config: &CmaEsConfig) -> Result<Self, OptimizeError> {
let n = x0.len();
if n == 0 {
return Err(OptimizeError::InvalidInput(
"dimension must be > 0".to_string(),
));
}
let lambda = config
.population_size
.unwrap_or_else(|| 4 + (3.0 * (n as f64).ln()) as usize);
if lambda < 2 {
return Err(OptimizeError::InvalidInput(
"population_size must be >= 2".to_string(),
));
}
let mu = lambda / 2;
let raw_weights: Vec<f64> = (1..=mu)
.map(|i| ((mu as f64 + 0.5) / i as f64).ln())
.collect();
let w_sum: f64 = raw_weights.iter().sum();
let weights: Vec<f64> = raw_weights.iter().map(|&w| w / w_sum).collect();
let mueff = 1.0 / weights.iter().map(|&w| w * w).sum::<f64>();
let cs = (mueff + 2.0) / (n as f64 + mueff + 5.0);
let ds = 1.0 + 2.0 * (0.0f64.max((mueff - 1.0) / (n as f64 + 1.0) - 1.0)).sqrt() + cs;
let chi_n =
(n as f64).sqrt() * (1.0 - 1.0 / (4.0 * n as f64) + 1.0 / (21.0 * n as f64 * n as f64));
let cc = (4.0 + mueff / n as f64) / (n as f64 + 4.0 + 2.0 * mueff / n as f64);
let c1 = 2.0 / ((n as f64 + 1.3).powi(2) + mueff);
let alpha_mu = 2.0;
let cmu = (alpha_mu * (mueff - 2.0 + 1.0 / mueff)
/ ((n as f64 + 2.0).powi(2) + alpha_mu * mueff / 2.0))
.min(1.0 - c1);
let normal_dist =
Normal::new(0.0, 1.0).map_err(|e| OptimizeError::InitializationError(e.to_string()))?;
Ok(Self {
n,
lambda,
mu,
weights,
mueff,
cs,
ds,
chi_n,
cc,
c1,
cmu,
mean: x0,
sigma: config.initial_sigma,
ps: Array1::zeros(n),
pc: Array1::zeros(n),
cov: Array2::eye(n),
eigenvalues: Array1::ones(n),
eigenvectors: Array2::eye(n),
rng: StdRng::seed_from_u64(config.seed),
normal_dist,
fevals: 0,
generations: 0,
})
}
fn sample_population(&mut self) -> Vec<Array1<f64>> {
let mut pop = Vec::with_capacity(self.lambda);
for _ in 0..self.lambda {
let z: Array1<f64> = (0..self.n)
.map(|_| self.normal_dist.sample(&mut self.rng))
.collect::<Vec<f64>>()
.into();
let dz: Array1<f64> = &z * &self.eigenvalues.mapv(|v| v.sqrt());
let bdz = self.eigenvectors.dot(&dz);
pop.push(&self.mean + &(self.sigma * &bdz));
}
pop
}
fn update(&mut self, ranked: &[(usize, f64)], population: &[Array1<f64>]) {
let n = self.n;
let gen_f64 = self.generations as f64 + 1.0;
let old_mean = self.mean.clone();
let mut new_mean = Array1::zeros(n);
for (k, &(idx, _)) in ranked[..self.mu].iter().enumerate() {
new_mean = new_mean + &population[idx] * self.weights[k];
}
self.mean = new_mean;
let mean_diff = (&self.mean - &old_mean) / self.sigma;
let inv_sqrt_diag: Array1<f64> =
self.eigenvalues
.mapv(|v| if v > 1e-14 { v.recip().sqrt() } else { 0.0 });
let inv_sqrt_c: Array2<f64> = self
.eigenvectors
.dot(&Array2::from_diag(&inv_sqrt_diag))
.dot(&self.eigenvectors.t());
let invsqrt_diff = inv_sqrt_c.dot(&mean_diff);
let cs = self.cs;
self.ps = (1.0 - cs) * &self.ps + (cs * (2.0 - cs) * self.mueff).sqrt() * &invsqrt_diff;
let ps_norm = self.ps.mapv(|v| v * v).sum().sqrt();
let h_thresh = 1.4 + 2.0 / (n as f64 + 1.0);
let ps_norm_normalized =
ps_norm / (1.0 - (1.0 - cs).powf(2.0 * gen_f64)).sqrt() / self.chi_n;
let h_sig = ps_norm_normalized < h_thresh;
let delta_h = if h_sig {
0.0
} else {
(2.0 - self.cc) * self.cc
};
self.pc = (1.0 - self.cc) * &self.pc
+ if h_sig {
(self.cc * (2.0 - self.cc) * self.mueff).sqrt() * &mean_diff
} else {
Array1::zeros(n)
};
let pc_col = self.pc.view().insert_axis(Axis(1));
let rank_one: Array2<f64> = pc_col.dot(&pc_col.t());
let mut rank_mu: Array2<f64> = Array2::zeros((n, n));
for (k, &(idx, _)) in ranked[..self.mu].iter().enumerate() {
let diff = (&population[idx] - &old_mean) / self.sigma;
let diff_col = diff.view().insert_axis(Axis(1));
rank_mu = rank_mu + self.weights[k] * diff_col.dot(&diff_col.t());
}
let c1 = self.c1;
let cmu = self.cmu;
self.cov = (1.0 + c1 * delta_h - c1 - cmu) * &self.cov + c1 * &rank_one + cmu * &rank_mu;
let ps_norm_new = self.ps.mapv(|v| v * v).sum().sqrt();
self.sigma *= ((cs / self.ds) * (ps_norm_new / self.chi_n - 1.0)).exp();
self.update_eigen();
self.generations += 1;
}
fn update_eigen(&mut self) {
let n = self.n;
let mut a = self.cov.clone();
for i in 0..n {
for j in (i + 1)..n {
let sym = (a[[i, j]] + a[[j, i]]) * 0.5;
a[[i, j]] = sym;
a[[j, i]] = sym;
}
}
let mut v = Array2::eye(n);
for _ in 0..20 {
let mut off_norm_sq = 0.0;
for i in 0..n {
for j in (i + 1)..n {
off_norm_sq += a[[i, j]] * a[[i, j]];
}
}
if off_norm_sq < 1e-28 {
break;
}
for p in 0..n {
for q in (p + 1)..n {
let apq = a[[p, q]];
if apq.abs() < 1e-15 {
continue;
}
let app = a[[p, p]];
let aqq = a[[q, q]];
let theta = 0.5 * (aqq - app) / apq;
let t = theta.signum() / (theta.abs() + (1.0 + theta * theta).sqrt());
let c_r = 1.0 / (1.0 + t * t).sqrt();
let s_r = t * c_r;
a[[p, p]] = app - t * apq;
a[[q, q]] = aqq + t * apq;
a[[p, q]] = 0.0;
a[[q, p]] = 0.0;
for r in 0..n {
if r != p && r != q {
let arp = a[[r, p]];
let arq = a[[r, q]];
let new_arp = c_r * arp - s_r * arq;
let new_arq = s_r * arp + c_r * arq;
a[[r, p]] = new_arp;
a[[p, r]] = new_arp;
a[[r, q]] = new_arq;
a[[q, r]] = new_arq;
}
}
for r in 0..n {
let vrp = v[[r, p]];
let vrq = v[[r, q]];
v[[r, p]] = c_r * vrp - s_r * vrq;
v[[r, q]] = s_r * vrp + c_r * vrq;
}
}
}
}
for i in 0..n {
self.eigenvalues[i] = a[[i, i]].max(1e-20);
}
self.eigenvectors = v;
}
}
pub fn minimize_cmaes<F>(
f: F,
x0: Array1<f64>,
bounds: Option<&[(f64, f64)]>,
config: CmaEsConfig,
) -> Result<CmaEsResult, OptimizeError>
where
F: Fn(&Array1<f64>) -> f64,
{
let n = x0.len();
if n == 0 {
return Err(OptimizeError::InvalidInput(
"dimension must be > 0".to_string(),
));
}
if let Some(b) = bounds {
if b.len() != n {
return Err(OptimizeError::InvalidInput(format!(
"bounds length {} does not match x0 length {}",
b.len(),
n
)));
}
for (i, &(lo, hi)) in b.iter().enumerate() {
if lo > hi {
return Err(OptimizeError::InvalidInput(format!(
"bounds[{}]: lower {} > upper {}",
i, lo, hi
)));
}
}
}
let max_fevals = config.max_fevals;
let xtol = config.xtol;
let mut state = CmaEs::new(x0.clone(), &config)?;
let x0_clipped = clip_to_bounds(&x0, bounds);
let mut best_f = f(&x0_clipped);
let mut best_x = x0_clipped;
state.fevals += 1;
loop {
let population = state.sample_population();
let mut fitness: Vec<(usize, f64)> = population
.iter()
.enumerate()
.map(|(i, xi)| {
let clipped = clip_to_bounds(xi, bounds);
let fval = f(&clipped);
(i, fval)
})
.collect();
state.fevals += population.len();
fitness.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let best_this_gen_f = fitness[0].1;
if best_this_gen_f < best_f {
best_f = best_this_gen_f;
best_x = clip_to_bounds(&population[fitness[0].0], bounds);
}
if state.fevals >= max_fevals {
return Ok(CmaEsResult {
x: best_x,
fun: best_f,
fevals: state.fevals,
generations: state.generations,
converged: false,
message: "Maximum function evaluations reached".to_string(),
});
}
if state.sigma < xtol {
return Ok(CmaEsResult {
x: best_x,
fun: best_f,
fevals: state.fevals,
generations: state.generations,
converged: true,
message: "Step size (sigma) converged below xtol".to_string(),
});
}
state.update(&fitness, &population);
}
}
#[inline]
fn clip_to_bounds(x: &Array1<f64>, bounds: Option<&[(f64, f64)]>) -> Array1<f64> {
match bounds {
None => x.clone(),
Some(b) => {
let clipped: Vec<f64> = x
.iter()
.zip(b.iter())
.map(|(&v, &(lo, hi))| v.clamp(lo, hi))
.collect();
Array1::from(clipped)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::array;
#[test]
fn test_cmaes_sphere_1d() {
let result = minimize_cmaes(
|x| x[0] * x[0],
array![5.0],
None,
CmaEsConfig {
max_fevals: 2000,
initial_sigma: 1.0,
ftol: 1e-8,
xtol: 1e-6,
..Default::default()
},
)
.expect("cmaes failed");
assert!(result.fun < 1e-6, "f* = {}, expected < 1e-6", result.fun);
assert!(
result.x[0].abs() < 1e-3,
"x* = {}, expected near 0",
result.x[0]
);
}
#[test]
fn test_cmaes_sphere_nd() {
let x0 = array![3.0, -2.0, 1.0, 4.0, -1.0];
let result = minimize_cmaes(
|x| x.iter().map(|&v| v * v).sum::<f64>(),
x0,
None,
CmaEsConfig {
max_fevals: 10_000,
initial_sigma: 1.0,
xtol: 1e-5,
..Default::default()
},
)
.expect("cmaes failed");
assert!(result.fun < 1e-4, "f* = {}, expected < 1e-4", result.fun);
}
#[test]
fn test_cmaes_rosenbrock() {
let result = minimize_cmaes(
|x| {
let a = 1.0 - x[0];
let b = x[1] - x[0] * x[0];
a * a + 100.0 * b * b
},
array![0.0, 0.0],
None,
CmaEsConfig {
max_fevals: 20_000,
initial_sigma: 0.5,
xtol: 1e-8,
..Default::default()
},
)
.expect("cmaes failed");
assert!(
result.fun < 0.01,
"Rosenbrock f* = {}, expected < 0.01",
result.fun
);
}
#[test]
fn test_cmaes_with_bounds() {
let result = minimize_cmaes(
|x| x[0] * x[0] + x[1] * x[1],
array![3.0, 3.0],
Some(&[(1.0, 5.0), (1.0, 5.0)]),
CmaEsConfig {
max_fevals: 5000,
initial_sigma: 0.5,
..Default::default()
},
)
.expect("cmaes failed");
assert!(
result.x[0] >= 0.9 && result.x[0] <= 2.0,
"x[0] = {}, expected in [0.9, 2.0]",
result.x[0]
);
assert!(
result.x[1] >= 0.9 && result.x[1] <= 2.0,
"x[1] = {}, expected in [0.9, 2.0]",
result.x[1]
);
}
#[test]
fn test_cmaes_result_fevals_and_generations() {
let result = minimize_cmaes(|x| x[0] * x[0], array![1.0], None, CmaEsConfig::default())
.expect("cmaes failed");
assert!(result.fevals > 0, "fevals should be > 0");
assert!(result.generations > 0, "generations should be > 0");
assert!(!result.message.is_empty(), "message should not be empty");
}
#[test]
fn test_cmaes_invalid_dimension() {
use scirs2_core::ndarray::Array1;
let empty: Array1<f64> = Array1::from(vec![]);
let result = minimize_cmaes(
|x| x.iter().map(|&v| v * v).sum(),
empty,
None,
CmaEsConfig::default(),
);
assert!(result.is_err(), "empty input should return error");
}
#[test]
fn test_cmaes_bounds_mismatch_error() {
let result = minimize_cmaes(
|x| x[0] * x[0],
array![1.0, 2.0],
Some(&[(0.0, 5.0)]), CmaEsConfig::default(),
);
assert!(result.is_err(), "bounds mismatch should return error");
}
#[test]
fn test_cmaes_population_size_override() {
let result = minimize_cmaes(
|x| (x[0] - 1.0).powi(2) + (x[1] + 1.0).powi(2),
array![0.0, 0.0],
None,
CmaEsConfig {
population_size: Some(20),
max_fevals: 5000,
initial_sigma: 0.8,
..Default::default()
},
)
.expect("cmaes failed");
assert!(result.fun < 0.01, "f* = {}", result.fun);
}
#[test]
fn test_cmaes_sigma_convergence() {
let result = minimize_cmaes(
|x| x[0] * x[0],
array![0.1],
None,
CmaEsConfig {
max_fevals: 100_000,
initial_sigma: 0.5,
xtol: 1e-3, ..Default::default()
},
)
.expect("cmaes failed");
assert!(result.converged || result.fevals >= 100_000);
}
#[test]
fn test_cmaes_quadratic_with_correlation() {
let result = minimize_cmaes(
|x| {
2.0 * x[0] * x[0] + 2.0 * x[0] * x[1] + 3.0 * x[1] * x[1]
},
array![3.0, -3.0],
None,
CmaEsConfig {
max_fevals: 5000,
initial_sigma: 1.0,
xtol: 1e-7,
..Default::default()
},
)
.expect("cmaes failed");
assert!(
result.fun < 1e-4,
"Correlated quadratic f* = {}",
result.fun
);
}
}