cmaes_lbfgsb/
cmaes.rs

1use rand::Rng;
2use rand::SeedableRng;
3use rand_distr::StandardNormal;
4use rand_pcg::Pcg64;
5use rayon::prelude::*;
6use std::cmp::Ordering;
7use std::f64;
8
9/// Output of the canonical CMA-ES optimization.
10pub struct CmaesResult {
11    /// Best (objective, parameters) found over the entire run (or across restarts).
12    pub best_solution: (f64, Vec<f64>),
13    /// Total number of generations performed (summed across all restarts).
14    pub generations_used: usize,
15    /// Reason for final termination.
16    pub termination_reason: String,
17    /// (Optional) The final population if needed.
18    pub final_population: Option<Vec<(f64, Vec<f64>)>>,
19}
20
21/// Configuration for canonical CMA-ES with evolution paths and rank updates.
22/// 
23/// This struct contains all parameters that control the behavior of the CMA-ES algorithm.
24/// Most parameters have sensible defaults and can be left as `None` to use automatic values.
25/// 
26/// # Basic Parameters
27/// 
28/// The most important parameters for typical usage are:
29/// - `population_size`: Controls exploration vs exploitation trade-off
30/// - `max_generations`: Maximum number of generations to run
31/// - `parallel_eval`: Enable parallel function evaluation
32/// - `verbosity`: Control output level
33/// 
34/// # Advanced Parameters
35/// 
36/// Advanced users can fine-tune the algorithm behavior using learning rates,
37/// restart strategies, and numerical precision settings.
38/// 
39/// # Example
40/// 
41/// ```rust
42/// use cmaes_lbfgsb::cmaes::CmaesCanonicalConfig;
43/// 
44/// // Basic configuration
45/// let config = CmaesCanonicalConfig {
46///     population_size: 20,
47///     max_generations: 1000,
48///     parallel_eval: true,
49///     verbosity: 1,
50///     ..Default::default()
51/// };
52/// 
53/// // Advanced configuration with restarts
54/// let advanced_config = CmaesCanonicalConfig {
55///     population_size: 50,
56///     max_generations: 500,
57///     ipop_restarts: 3,
58///     total_evals_budget: 100000,
59///     use_subrun_budgeting: true,
60///     ..Default::default()
61/// };
62/// ```
63pub struct CmaesCanonicalConfig {
64    /// Population size (number of candidate solutions per generation).
65    /// 
66    /// **Default**: 0 (automatic: 4 + 3⌊ln(n)⌋ where n is problem dimension)
67    /// 
68    /// **Typical range**: 10-100 for most problems
69    /// 
70    /// **Larger values**: Better global exploration, slower convergence, more robust
71    /// **Smaller values**: Faster convergence, risk of premature convergence
72    /// 
73    /// **Guidelines**:
74    /// - Easy problems (unimodal): Use smaller populations (10-20)
75    /// - Difficult problems (multimodal): Use larger populations (50-100+)
76    /// - High-dimensional problems: Consider automatic sizing
77    pub population_size: usize,
78
79    /// Maximum number of generations per run.
80    /// 
81    /// **Default**: 500
82    /// 
83    /// **Guidelines**:
84    /// - Simple problems: 100-500 generations usually sufficient
85    /// - Complex problems: 1000-5000+ generations may be needed
86    /// - Use with `total_evals_budget` for better control
87    /// 
88    /// If sub-run budgeting is disabled, each run uses this value for generations.
89    /// Otherwise, sub-runs can get smaller or larger generation budgets.
90    pub max_generations: usize,
91
92    /// Random seed for reproducible results.
93    /// 
94    /// **Default**: 42
95    /// 
96    /// Set to different values to get different random runs, or keep the same
97    /// for reproducible experiments.
98    pub seed: u64,
99
100    /// Learning rate for rank-one update of covariance matrix.
101    /// 
102    /// **Default**: None (automatic: 2/((n+1.3)² + μ_eff))
103    /// 
104    /// **Typical range**: 0.0001 - 0.1
105    /// 
106    /// Controls how quickly the algorithm adapts to the search direction.
107    /// Smaller values = more conservative adaptation.
108    pub c1: Option<f64>,
109
110    /// Learning rate for rank-μ update of covariance matrix.
111    /// 
112    /// **Default**: None (automatic: depends on μ_eff and problem dimension)
113    /// 
114    /// **Typical range**: 0.001 - 1.0
115    /// 
116    /// Controls how much the population covariance influences the search distribution.
117    /// Must be balanced with c1 to ensure proper covariance matrix updates.
118    pub c_mu: Option<f64>,
119
120    /// Learning rate for cumulation path for step-size control.
121    /// 
122    /// **Default**: None (automatic: (μ_eff + 2)/(n + μ_eff + 5))
123    /// 
124    /// **Typical range**: 0.1 - 1.0
125    /// 
126    /// Controls the step-size adaptation speed. Larger values lead to faster
127    /// step-size changes but may cause instability.
128    pub c_sigma: Option<f64>,
129
130    /// Damping parameter for step-size update.
131    /// 
132    /// **Default**: None (automatic: 1 + 2max(0, √((μ_eff-1)/(n+1)) - 1))
133    /// 
134    /// **Typical range**: 1.0 - 10.0
135    /// 
136    /// Controls the damping of step-size updates. Larger values = more conservative
137    /// step-size changes, which can improve stability but slow adaptation.
138    pub d_sigma: Option<f64>,
139
140    /// Enable parallel evaluation of candidate solutions.
141    /// 
142    /// **Default**: false
143    /// 
144    /// When true, the population is evaluated in parallel using Rayon.
145    /// Recommended for expensive objective functions. Disable for very fast
146    /// functions where parallelization overhead exceeds benefits.
147    pub parallel_eval: bool,
148
149    /// Verbosity level for progress output.
150    /// 
151    /// **Levels**:
152    /// - 0: Silent (no output)
153    /// - 1: Basic progress (every 10 generations)
154    /// - 2: Detailed debug information
155    /// 
156    /// **Default**: 0
157    pub verbosity: u8,
158
159    /// Number of IPOP (Increasing Population) restarts.
160    /// 
161    /// **Default**: 0 (no IPOP restarts)
162    /// 
163    /// **Typical range**: 0-5 restarts
164    /// 
165    /// IPOP restarts the algorithm with increasing population sizes when
166    /// it gets stuck. Each restart doubles the population size by default.
167    /// Effective for multimodal problems but increases computational cost.
168    /// 
169    /// **Note**: BIPOP overrides IPOP if both are > 0.
170    pub ipop_restarts: usize,
171
172    /// Factor by which population size is multiplied each IPOP restart.
173    /// 
174    /// **Default**: 2.0
175    /// 
176    /// **Typical range**: 1.5 - 3.0
177    /// 
178    /// Controls how aggressively the population size grows with each restart.
179    /// Larger factors provide better exploration but increase cost exponentially.
180    pub ipop_increase_factor: f64,
181
182    /// Number of BIPOP (Bi-Population) restarts.
183    /// 
184    /// **Default**: 0 (no BIPOP restarts)
185    /// 
186    /// **Typical range**: 0-10 restarts
187    /// 
188    /// BIPOP alternates between small and large population runs. More sophisticated
189    /// than IPOP and often more effective for difficult multimodal problems.
190    /// 
191    /// **Note**: BIPOP overrides IPOP if both are > 0.
192    pub bipop_restarts: usize,
193
194    /// Total function-evaluations budget across all runs.
195    /// 
196    /// **Default**: 0 (no budget limit)
197    /// 
198    /// **Typical values**: 10,000 - 1,000,000 depending on problem complexity
199    /// 
200    /// When combined with `use_subrun_budgeting`, this budget is intelligently
201    /// allocated across multiple restart runs.
202    pub total_evals_budget: usize,
203
204    /// Enable advanced sub-run budgeting logic for IPOP/BIPOP.
205    /// 
206    /// **Default**: false
207    /// 
208    /// When true, the `total_evals_budget` is strategically allocated across
209    /// restart runs rather than running each to completion. This often provides
210    /// better results within a fixed computational budget.
211    pub use_subrun_budgeting: bool,
212
213    /// Alpha parameter used in c_mu calculation.
214    /// 
215    /// **Default**: Some(2.0)
216    /// 
217    /// **Typical range**: 1.0 - 4.0
218    /// 
219    /// Advanced parameter that influences the balance between rank-1 and rank-μ
220    /// updates of the covariance matrix. Rarely needs adjustment.
221    pub alpha_mu: Option<f64>,
222
223    /// Threshold factor for the evolution path test in step-size control.
224    /// 
225    /// **Default**: Some(1.4)
226    /// 
227    /// **Typical range**: 1.0 - 2.0
228    /// 
229    /// Controls when to halt the cumulation of the evolution path based on
230    /// its length. Affects the balance between exploration and exploitation.
231    pub hsig_threshold_factor: Option<f64>,
232
233    /// Factor for small population size calculation in BIPOP.
234    /// 
235    /// **Default**: Some(0.5)
236    /// 
237    /// **Typical range**: 0.1 - 0.8
238    /// 
239    /// In BIPOP, determines the size of "small" population runs relative to
240    /// the baseline population size. Smaller values = more focused local search.
241    pub bipop_small_population_factor: Option<f64>,
242
243    /// Budget allocation factor for small BIPOP runs.
244    /// 
245    /// **Default**: Some(1.0)
246    /// 
247    /// **Typical range**: 0.5 - 2.0
248    /// 
249    /// Controls how much of the evaluation budget is allocated to small population
250    /// runs in BIPOP. Values > 1.0 give more budget to exploitation phases.
251    pub bipop_small_budget_factor: Option<f64>,
252
253    /// Budget allocation factor for large BIPOP runs.
254    /// 
255    /// **Default**: Some(3.0)
256    /// 
257    /// **Typical range**: 1.0 - 5.0
258    /// 
259    /// Controls how much of the evaluation budget is allocated to large population
260    /// runs in BIPOP. Values > 1.0 give more budget to exploration phases.
261    pub bipop_large_budget_factor: Option<f64>,
262
263    /// Factor for large population size increase in BIPOP.
264    /// 
265    /// **Default**: Some(2.0)
266    /// 
267    /// **Typical range**: 1.5 - 3.0
268    /// 
269    /// Controls how the large population size grows with each BIPOP restart.
270    /// Similar to `ipop_increase_factor` but for BIPOP large runs.
271    pub bipop_large_pop_increase_factor: Option<f64>,
272
273    /// Maximum number of iterations for bounds mirroring.
274    /// 
275    /// **Default**: Some(8)
276    /// 
277    /// **Typical range**: 5 - 20
278    /// 
279    /// When candidates violate bounds, they are "mirrored" back into the feasible
280    /// region. This parameter limits how many mirror operations are performed
281    /// before clamping to bounds.
282    pub max_bound_iterations: Option<usize>,
283
284    /// Numerical precision threshold for eigendecomposition convergence.
285    /// 
286    /// **Default**: Some(1e-15)
287    /// 
288    /// **Typical range**: 1e-20 to 1e-10
289    /// 
290    /// Controls the precision of the eigendecomposition of the covariance matrix.
291    /// Smaller values = higher precision but potentially slower computation.
292    pub eig_precision_threshold: Option<f64>,
293
294    /// Minimum threshold for covariance matrix eigenvalues.
295    /// 
296    /// **Default**: Some(1e-15)
297    /// 
298    /// **Typical range**: 1e-20 to 1e-10
299    /// 
300    /// Prevents numerical issues by ensuring eigenvalues don't become too small.
301    /// Smaller values allow more aggressive adaptation but risk numerical instability.
302    pub min_eig_value: Option<f64>,
303
304    /// Minimum threshold for matrix operations.
305    /// 
306    /// **Default**: Some(1e-20)
307    /// 
308    /// **Typical range**: 1e-25 to 1e-15
309    /// 
310    /// General numerical threshold for matrix computations to prevent
311    /// underflow and maintain numerical stability.
312    pub matrix_op_threshold: Option<f64>,
313
314    /// Maximum stagnation generations before termination.
315    /// 
316    /// **Default**: Some(200)
317    /// 
318    /// **Typical range**: 50 - 1000
319    /// 
320    /// Algorithm terminates if no improvement is seen for this many consecutive
321    /// generations. Prevents infinite runs on problems where the optimum has
322    /// been reached within tolerance.
323    pub stagnation_limit: Option<usize>,
324
325    /// Minimum sigma value to prevent numerical issues.
326    /// 
327    /// **Default**: Some(1e-8)
328    /// 
329    /// **Typical range**: 1e-12 to 1e-6
330    /// 
331    /// Prevents the step-size from becoming so small that progress stops due
332    /// to numerical precision limits. Should be much smaller than expected
333    /// parameter scales.
334    pub min_sigma: Option<f64>,
335}
336
337impl Default for CmaesCanonicalConfig {
338    fn default() -> Self {
339        Self {
340            population_size: 0,
341            max_generations: 500,
342            seed: 42,
343            c1: None,
344            c_mu: None,
345            c_sigma: None,
346            d_sigma: None,
347            parallel_eval: false,
348            verbosity: 0,
349            ipop_restarts: 0,
350            ipop_increase_factor: 2.0,
351            bipop_restarts: 0,
352            total_evals_budget: 0,
353            use_subrun_budgeting: false,
354            alpha_mu: Some(2.0),
355            hsig_threshold_factor: Some(1.4),
356            bipop_small_population_factor: Some(0.5),
357            bipop_small_budget_factor: Some(1.0),
358            bipop_large_budget_factor: Some(3.0),
359            bipop_large_pop_increase_factor: Some(2.0),
360            max_bound_iterations: Some(8),
361            eig_precision_threshold: Some(1e-15),
362            min_eig_value: Some(1e-15),
363            matrix_op_threshold: Some(1e-20),
364            stagnation_limit: Some(200),
365            min_sigma: Some(1e-8),
366        }
367    }
368}
369
370/// Internal struct carrying the CMA-ES "state" so we can continue from one sub-run to the next.
371pub struct CmaesIntermediateState {
372    pub mean: Vec<f64>,
373    pub sigma: f64,
374    pub cov_eigvals: Vec<f64>,
375    pub cov_eigvecs: Vec<Vec<f64>>,
376    pub p_c: Vec<f64>,
377    pub p_s: Vec<f64>,
378    pub best_obj: f64,
379    pub best_params: Vec<f64>,
380}
381
382/// Mirror reflection for bounds.
383fn mirror_bound(mut x: f64, lb: f64, ub: f64, max_iterations: usize) -> f64 {
384    let _width = ub - lb;
385    let mut iters = 0;
386    while (x < lb || x > ub) && iters < max_iterations {
387        if x < lb {
388            x = lb + (lb - x);
389        } else if x > ub {
390            x = ub - (x - ub);
391        }
392        iters += 1;
393    }
394    if x < lb {
395        x = lb;
396    }
397    if x > ub {
398        x = ub;
399    }
400    x
401}
402
403/// Compute CMA-ES weights.
404#[allow(clippy::needless_range_loop)]
405fn compute_weights(lambda: usize) -> (Vec<f64>, usize) {
406    let mu = lambda / 2;
407    let mut weights = vec![0.0; mu];
408    let mut sum_w = 0.0;
409    for i in 0..mu {
410        let val = (mu as f64 + 0.5).ln() - ((i + 1) as f64).ln();
411        weights[i] = val;
412        sum_w += val;
413    }
414    for w in weights.iter_mut() {
415        *w /= sum_w;
416    }
417    (weights, mu)
418}
419
420/// Naive Jacobi or similar approach for a real-symmetric matrix.
421#[allow(clippy::needless_range_loop)]
422fn eigendecompose_symmetric(cov: &[Vec<f64>], eps: f64, apq_threshold: f64) -> Result<(Vec<Vec<f64>>, Vec<f64>), String> {
423    let n = cov.len();
424    let mut mat = cov.to_vec();
425    let mut eigvecs = vec![vec![0.0; n]; n];
426    for i in 0..n {
427        eigvecs[i][i] = 1.0;
428    }
429    let mut changed = true;
430    let max_iter = 64 * n;
431
432    for _iter in 0..max_iter {
433        if !changed {
434            break;
435        }
436        changed = false;
437        for p in 0..n {
438            for q in (p + 1)..n {
439                let apq = mat[p][q];
440                if apq.abs() < apq_threshold {
441                    continue;
442                }
443                let app = mat[p][p];
444                let aqq = mat[q][q];
445                let theta = 0.5 * (aqq - app) / apq;
446                let t = if theta.abs() > 1e6 {
447                    0.5 / theta
448                } else {
449                    1.0 / (theta.abs() + (theta * theta + 1.0).sqrt())
450                };
451                let t = if theta < 0.0 { -t } else { t };
452                let c = 1.0 / (1.0 + t * t).sqrt();
453                let s = t * c;
454                let tau = s / (1.0 + c);
455
456                mat[p][p] = app - t * apq;
457                mat[q][q] = aqq + t * apq;
458                mat[p][q] = 0.0;
459                mat[q][p] = 0.0;
460
461                for r in 0..p {
462                    let arp = mat[r][p];
463                    let arq = mat[r][q];
464                    mat[r][p] = arp - s * (arq + tau * arp);
465                    mat[p][r] = mat[r][p];
466                    mat[r][q] = arq + s * (arp - tau * arq);
467                    mat[q][r] = mat[r][q];
468                }
469                for r in (p + 1)..q {
470                    let apr = mat[p][r];
471                    let arq = mat[r][q];
472                    mat[p][r] = apr - s * (arq + tau * apr);
473                    mat[r][p] = mat[p][r];
474                    mat[r][q] = arq + s * (apr - tau * arq);
475                    mat[q][r] = mat[r][q];
476                }
477                for r in (q + 1)..n {
478                    let apr = mat[p][r];
479                    let aqr = mat[q][r];
480                    mat[p][r] = apr - s * (aqr + tau * apr);
481                    mat[r][p] = mat[p][r];
482                    mat[q][r] = aqr + s * (apr - tau * aqr);
483                    mat[r][q] = mat[q][r];
484                }
485
486                for r in 0..n {
487                    let vrp = eigvecs[r][p];
488                    let vrq = eigvecs[r][q];
489                    eigvecs[r][p] = vrp - s * (vrq + tau * vrp);
490                    eigvecs[r][q] = vrq + s * (vrp - tau * vrq);
491                }
492                changed = true;
493            }
494        }
495    }
496
497    let mut diag = vec![0.0; n];
498    for i in 0..n {
499        diag[i] = mat[i][i];
500        if diag[i] < eps {
501            diag[i] = eps;
502        }
503    }
504    Ok((eigvecs, diag))
505}
506
507#[allow(clippy::needless_range_loop)]
508fn multiply_mat_vec(m: &[Vec<f64>], v: &[f64], scale: f64) -> Vec<f64> {
509    let n = m.len();
510    let mut out = vec![0.0; n];
511    for i in 0..n {
512        let mut sum = 0.0;
513        for j in 0..n {
514            sum += m[i][j] * v[j];
515        }
516        out[i] = scale * sum;
517    }
518    out
519}
520
521#[allow(clippy::needless_range_loop)]
522fn outer(v: &[f64], w: &[f64]) -> Vec<Vec<f64>> {
523    let n = v.len();
524    let mut mat = vec![vec![0.0; n]; n];
525    for i in 0..n {
526        for j in 0..n {
527            mat[i][j] = v[i] * w[j];
528        }
529    }
530    mat
531}
532
533/// Multiply matrix by scalar in-place.
534fn scale_mat(a: &mut [Vec<f64>], alpha: f64) {
535    for row in a.iter_mut() {
536        for val in row.iter_mut() {
537            *val *= alpha;
538        }
539    }
540}
541
542/// Rebuild full covariance from B and D (diagonal).
543#[allow(clippy::needless_range_loop)]
544fn recompute_cov(b: &[Vec<f64>], d: &[f64]) -> Vec<Vec<f64>> {
545    let n = b.len();
546    let mut bd = vec![vec![0.0; n]; n];
547    for i in 0..n {
548        for j in 0..n {
549            bd[i][j] = b[i][j] * d[j];
550        }
551    }
552    let mut out = vec![vec![0.0; n]; n];
553    for i in 0..n {
554        for j in 0..n {
555            let mut sum = 0.0;
556            for k in 0..n {
557                sum += bd[i][k] * b[j][k];
558            }
559            out[i][j] = sum;
560        }
561    }
562    out
563}
564
565/// Inverse of B*D^0.5 for sampling or z-computation.
566#[allow(clippy::needless_range_loop)]
567fn invert_b_d(b: &[Vec<f64>], d: &[f64]) -> Vec<Vec<f64>> {
568    let n = b.len();
569    let mut out = vec![vec![0.0; n]; n];
570    for i in 0..n {
571        let inv_sd = 1.0 / (d[i].sqrt());
572        for j in 0..n {
573            out[j][i] = b[i][j] * inv_sd;
574        }
575    }
576    out
577}
578
579/// Approx chi(dim).
580fn chi_dim(n: f64) -> f64 {
581    n.sqrt() * (1.0 - 1.0 / (4.0 * n) + 1.0 / (21.0 * n * n))
582}
583
584/// Initializes a brand-new CMA-ES state if none is provided.
585#[allow(clippy::needless_range_loop)]
586fn init_cmaes_state(
587    bounds: &[(f64, f64)],
588    config: &CmaesCanonicalConfig,
589) -> CmaesIntermediateState {
590    let dim = bounds.len();
591    // Mean is midpoint of bounds
592    let mut mean = vec![0.0; dim];
593    for (i, &(lb, ub)) in bounds.iter().enumerate() {
594        mean[i] = 0.5 * (lb + ub);
595    }
596
597    // Sigma ~ 1/4 avg range
598    let mut avg_range = 0.0;
599    for &(lb, ub) in bounds {
600        avg_range += (ub - lb).abs();
601    }
602    avg_range /= dim as f64;
603    let sigma = 0.25 * avg_range.max(config.min_sigma.unwrap_or(1e-8));
604
605    // Identity for covariance
606    let mut cov_eigvecs = vec![vec![0.0; dim]; dim];
607    for i in 0..dim {
608        cov_eigvecs[i][i] = 1.0;
609    }
610    let cov_eigvals = vec![1.0; dim];
611    let p_c = vec![0.0; dim];
612    let p_s = vec![0.0; dim];
613
614    CmaesIntermediateState {
615        mean,
616        sigma,
617        cov_eigvals,
618        cov_eigvecs,
619        p_c,
620        p_s,
621        best_obj: f64::INFINITY,
622        best_params: vec![],
623    }
624}
625
626/// Runs one CMA-ES sub-run, continuing from an existing state (if given).
627/// 
628/// - `state_in`: if Some, we continue from that state; else initialize fresh
629/// - `best_obj_in`: the global best objective so far (to handle termination or merges)
630/// - `evals_limit`: 0 => no limit, else stop if we exceed it
631/// 
632/// Returns:
633/// - `CmaesResult` with partial info
634/// - Updated state (mean, sigma, B, D, p_c, p_s, etc.)
635/// - Boolean indicating if we improved the best objective in this run
636/// - The number of function evaluations used in this sub-run
637#[allow(clippy::too_many_arguments)]
638fn cmaes_one_run<F>(
639    objective: &F,
640    bounds: &[(f64, f64)],
641    config: &CmaesCanonicalConfig,
642    state_in: Option<CmaesIntermediateState>,
643    best_obj_in: f64,
644    rng_seed: u64,
645    evals_limit: usize,
646    initial_mean: Option<&[f64]>,
647
648) -> (CmaesResult, CmaesIntermediateState, bool, usize)
649where
650    F: Fn(&[f64]) -> f64 + Sync + Send,
651{
652    let dim = bounds.len();
653    let mut rng = Pcg64::seed_from_u64(rng_seed);
654
655    // CMA-ES parameters
656    let lambda = if config.population_size == 0 {
657        4 + (3.0 * (dim as f64).ln()) as usize
658    } else {
659        config.population_size
660    };
661    let (weights, mu) = compute_weights(lambda);
662    let mu_eff = 1.0 / weights.iter().map(|w| w.powi(2)).sum::<f64>();
663
664    // Learning rates
665    let c_sigma = config
666        .c_sigma
667        .unwrap_or_else(|| (mu_eff + 2.0) / (dim as f64 + mu_eff + 5.0));
668    let d_sigma = config
669        .d_sigma
670        .unwrap_or_else(|| 1.0 + 2.0_f64.max(((mu_eff - 1.0) / (dim as f64)).sqrt()));
671    let c_c = (4.0 + mu_eff / dim as f64) / (dim as f64 + 4.0 + 2.0 * mu_eff / dim as f64);
672    let c1 = config
673        .c1
674        .unwrap_or_else(|| 2.0 / ((dim as f64 + 1.3).powi(2) + mu_eff));
675    let alpha_mu = config.alpha_mu.unwrap_or(2.0);
676    let c_mu = config.c_mu.unwrap_or_else(|| {
677        alpha_mu
678            * ((mu_eff - 2.0 + 1.0 / mu_eff)
679                / ((dim as f64 + 2.0).powi(2) + alpha_mu * mu_eff / 2.0))
680    });
681
682    // Possibly continue from existing state
683    let mut st = match state_in {
684        Some(s) => s,
685        None => {
686            let mut fresh = init_cmaes_state(bounds, config);
687    
688            // If user provided an initial guess, override the fresh state's mean
689            if let Some(guess) = initial_mean {
690                // Reflect/Clamp each dimension into [lb, ub]
691                for (i, &(lb, ub)) in bounds.iter().enumerate() {
692                    let g = guess[i];
693                    fresh.mean[i] = mirror_bound(g, lb, ub, config.max_bound_iterations.unwrap_or(8));
694                }
695            }
696    
697            fresh
698        }
699    };
700
701    // If the global best is better than the state's best, override the state's mean
702    if best_obj_in < st.best_obj {
703        st.best_obj = best_obj_in;
704        // We do *not* forcibly override the distribution if the user wants synergy with the old cov,
705        // but let's set the mean to the known best. This ensures we shift near the best region.
706        st.mean = st.best_params.clone();
707    }
708
709    // If the user never updated st.best_params but best_obj_in is from outside,
710    // we can forcibly store an empty state param => it won't override. So let's do an extra check:
711    if st.best_params.is_empty() && best_obj_in < f64::INFINITY {
712        // We don't know the param that gave best_obj_in, so we can't override the mean in that scenario.
713    }
714
715    let mut best_obj = st.best_obj;
716    let mut best_params = st.best_params.clone();
717    let mut mean = st.mean.clone();
718    let mut sigma = st.sigma;
719    let mut cov_eigvals = st.cov_eigvals.clone();
720    let mut cov_eigvecs = st.cov_eigvecs.clone();
721    let mut p_c = st.p_c.clone();
722    let mut p_s = st.p_s.clone();
723
724    let mut evals_used = 0usize;
725    let max_generations = config.max_generations;
726    let stagnation_limit = config.stagnation_limit.unwrap_or(200);
727
728    let mut no_improvement_count = 0usize;
729    let mut improved_flag = false;
730    let mut generation = 0usize;
731    let mut termination_reason = String::new();
732
733    if config.verbosity >= 2 {
734        eprintln!(
735            "[DEBUG] Single CMA-ES run: pop_size={}, seed={}, evals_limit={}",
736            lambda, rng_seed, evals_limit
737        );
738    }
739
740    while generation < max_generations {
741        // If we have an eval limit > 0 and used_evals >= limit, break
742        if evals_limit > 0 && evals_used >= evals_limit {
743            termination_reason = format!("Reached sub-run eval limit ({} evals)", evals_limit);
744            break;
745        }
746
747        // Sample population
748        let b = &cov_eigvecs;
749        let d = &cov_eigvals;
750        let candidates: Vec<Vec<f64>> = (0..lambda)
751            .map(|_| {
752                let mut z = vec![0.0; dim];
753                #[allow(clippy::needless_range_loop)]
754                for i in 0..dim {
755                    z[i] = rng.sample(StandardNormal);
756                }
757                let mut yz = vec![0.0; dim];
758                for i in 0..dim {
759                    let sqrt_d_i = d[i].sqrt();
760                    let mut sum = 0.0;
761                    for j in 0..dim {
762                        sum += b[j][i] * z[j];
763                    }
764                    yz[i] = sqrt_d_i * sum;
765                }
766                let mut candidate = vec![0.0; dim];
767                for i in 0..dim {
768                    let raw = mean[i] + sigma * yz[i];
769                    candidate[i] = mirror_bound(raw, bounds[i].0, bounds[i].1, config.max_bound_iterations.unwrap_or(8));
770                }
771                candidate
772            })
773            .collect();
774
775        // Evaluate
776        let evals: Vec<f64> = if config.parallel_eval {
777            candidates.par_iter().map(|c| objective(c)).collect()
778        } else {
779            candidates.iter().map(|c| objective(c)).collect()
780        };
781        evals_used += candidates.len();
782
783        if evals_limit > 0 && evals_used > evals_limit {
784            termination_reason = format!(
785                "Exceeded sub-run eval limit: used {}, limit={}",
786                evals_used, evals_limit
787            );
788        }
789
790        let mut population = Vec::with_capacity(lambda);
791        for (c, fval) in candidates.into_iter().zip(evals.into_iter()) {
792            population.push((fval, c));
793        }
794        population.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
795
796        // Update best
797        if population[0].0 < best_obj {
798            best_obj = population[0].0;
799            best_params = population[0].1.clone();
800            no_improvement_count = 0;
801            improved_flag = true;
802        } else {
803            no_improvement_count += 1;
804        }
805
806        // Weighted recombination
807        let mut new_mean = vec![0.0; dim];
808        // mu can be up to population.len(), just in case
809        let real_mu = mu.min(population.len());
810        for (i, (_, p)) in population.iter().take(real_mu).enumerate() {
811            let w = weights[i];
812            for d_ in 0..dim {
813                new_mean[d_] += w * p[d_];
814            }
815        }
816
817        // Construct z_k
818        let inv_bd = invert_b_d(b, d);
819        let mut y_wsum = vec![0.0; dim];
820        for (i, (_, p)) in population.iter().take(real_mu).enumerate() {
821            let w = weights[i];
822            let mut diff = vec![0.0; dim];
823            for d_ in 0..dim {
824                diff[d_] = p[d_] - mean[d_];
825            }
826            let z_k = multiply_mat_vec(&inv_bd, &diff, 1.0 / sigma);
827            for d_ in 0..dim {
828                y_wsum[d_] += w * z_k[d_];
829            }
830        }
831
832        // Update p_s
833        p_s = p_s
834            .iter()
835            .zip(y_wsum.iter())
836            .map(|(ps_i, &y)| (1.0 - c_sigma) * ps_i + ((c_sigma * (2.0 - c_sigma) * mu_eff).sqrt()) * y)
837            .collect();
838
839        let ps_norm = p_s.iter().map(|v| v * v).sum::<f64>().sqrt();
840        sigma *= f64::exp((c_sigma / d_sigma) * (ps_norm / chi_dim(dim as f64) - 1.0));
841
842        // Update p_c
843        let hsig = if ps_norm
844            / (1.0 - (1.0 - c_sigma).powi(2 * (generation + 1) as i32)).sqrt()
845            < (config.hsig_threshold_factor.unwrap_or(1.4) + 2.0 / (dim as f64 + 1.0)) * chi_dim(dim as f64)
846        {
847            1.0
848        } else {
849            0.0
850        };
851        for d_ in 0..dim {
852            p_c[d_] = (1.0 - c_c) * p_c[d_] + hsig * ((c_c * (2.0 - c_c) * mu_eff).sqrt()) * y_wsum[d_];
853        }
854
855        // Covariance updates
856        let mut rank1 = outer(&p_c, &p_c);
857        scale_mat(&mut rank1, c1 * hsig);
858
859        let mut rank_mu = vec![vec![0.0; dim]; dim];
860        for (i, (_, p)) in population.iter().take(real_mu).enumerate() {
861            let w = weights[i];
862            let mut diff = vec![0.0; dim];
863            for d_ in 0..dim {
864                diff[d_] = p[d_] - mean[d_];
865            }
866            let z_k = multiply_mat_vec(&inv_bd, &diff, 1.0 / sigma);
867            let o = outer(&z_k, &z_k);
868            for r in 0..dim {
869                for c in 0..dim {
870                    rank_mu[r][c] += w * o[r][c];
871                }
872            }
873        }
874        scale_mat(&mut rank_mu, c_mu);
875
876        let c_mat = recompute_cov(b, d);
877        let mut updated_c = c_mat;
878        for r in 0..dim {
879            for c in 0..dim {
880                updated_c[r][c] = (1.0 - c1 - c_mu) * updated_c[r][c] + rank1[r][c] + rank_mu[r][c];
881            }
882        }
883
884        match eigendecompose_symmetric(&updated_c, config.eig_precision_threshold.unwrap_or(1e-15), config.matrix_op_threshold.unwrap_or(1e-20)) {
885            Ok((new_b, new_d)) => {
886                cov_eigvecs = new_b;
887                cov_eigvals = new_d;
888            }
889            Err(e) => {
890                if config.verbosity > 0 {
891                    eprintln!("Warning: decomposition failed at gen {}: {}", generation, e);
892                }
893            }
894        }
895
896        mean = new_mean;
897
898        if no_improvement_count > stagnation_limit {
899            termination_reason = format!("No improvement for {} consecutive generations", stagnation_limit);
900            break;
901        }
902
903        generation += 1;
904
905        if config.verbosity > 0 && (generation % 10 == 0 || generation == max_generations) {
906            println!(
907                "[CMA-ES Gen {}] best={:.8}, sigma={:.5}, no_improv={}, evals_used={}",
908                generation, best_obj, sigma, no_improvement_count, evals_used
909            );
910        }
911
912        if !termination_reason.is_empty() {
913            // e.g. we set it above for eval-limit reasons
914            break;
915        }
916    }
917
918    if termination_reason.is_empty() {
919        termination_reason = format!("Max generations {} reached", max_generations);
920    }
921
922    // Update the state for next run
923    st.mean = mean;
924    st.sigma = sigma;
925    st.cov_eigvals = cov_eigvals;
926    st.cov_eigvecs = cov_eigvecs;
927    st.p_c = p_c;
928    st.p_s = p_s;
929    st.best_obj = best_obj;
930    st.best_params = best_params.clone();
931
932    let result = CmaesResult {
933        best_solution: (best_obj, best_params),
934        generations_used: generation,
935        termination_reason,
936        final_population: None, // if needed, store population
937    };
938
939    (result, st, improved_flag, evals_used)
940}
941
942/// Main CMA-ES function.
943pub fn canonical_cmaes_optimize<F>(
944    objective: F,
945    bounds: &[(f64, f64)],
946    config: CmaesCanonicalConfig,
947    initial_mean: Option<Vec<f64>>,
948
949) -> CmaesResult
950where
951    F: Fn(&[f64]) -> f64 + Sync + Send,
952{
953    // If no restarts, just do a single run from scratch
954    if config.ipop_restarts == 0 && config.bipop_restarts == 0 {
955        let (res, _, _, _) = cmaes_one_run(
956            &objective,
957            bounds,
958            &config,
959            None,              // no prior state
960            f64::INFINITY,     // best_obj_in
961            config.seed,
962            0,                 // no eval limit
963            initial_mean.as_deref(), // pass the user guess as Option<&[f64]>
964        );
965        return res;
966    }
967
968    // BIPOP takes precedence
969    if config.bipop_restarts > 0 {
970        return run_bipop(&objective, bounds, config, initial_mean);
971    }
972
973    // Otherwise IPOP
974    run_ipop(&objective, bounds, config, initial_mean)
975}
976
977/// IPOP with advanced state reuse.
978fn run_ipop<F>(
979    objective: &F,
980    bounds: &[(f64, f64)],
981    mut config: CmaesCanonicalConfig,
982    initial_mean: Option<Vec<f64>>,
983
984) -> CmaesResult
985where
986    F: Fn(&[f64]) -> f64 + Sync + Send,
987{
988    let restarts = config.ipop_restarts;
989    let mut global_best = f64::INFINITY;
990    let mut global_params = vec![];
991    let mut final_pop = None;
992    let mut final_reason = String::new();
993    let mut total_gens_used = 0usize;
994
995    // Keep track of total evals used
996    let mut total_evals_used = 0usize;
997    let total_budget = config.total_evals_budget;
998
999    // Our intermediate state:
1000    let mut cmaes_state: Option<CmaesIntermediateState> = None;
1001
1002    for restart_i in 0..=restarts {
1003        // Decide sub-run eval limit if sub-run budgeting is on
1004        let leftover = total_budget.saturating_sub(total_evals_used);
1005        let eval_limit_this_run = if config.use_subrun_budgeting && leftover > 0 {
1006            // We'll do a simple scheme: each run i gets fraction ~ (restarts - i + 1).
1007            // The real approach can be more refined. Here we clamp to leftover strictly.
1008            let runs_left = (restarts - restart_i) + 1;
1009            let fraction = runs_left as f64 / (restarts + 1) as f64;
1010            let chunk = (fraction * leftover as f64).round() as usize;
1011            chunk.min(leftover)
1012        } else {
1013            0 // means no limit
1014        };
1015
1016        // Perform one run
1017        let (res, state_out, _improved, used_evals) = cmaes_one_run(
1018            objective,
1019            bounds,
1020            &config,
1021            cmaes_state.take(), // pass the old state to continue distribution
1022            global_best,
1023            config.seed + restart_i as u64,
1024            eval_limit_this_run,
1025            if restart_i == 0 { initial_mean.as_deref() } else { None },
1026
1027        );
1028
1029        // Update total usage, final state, final reason, etc.
1030        total_evals_used += used_evals;
1031        total_gens_used += res.generations_used;
1032        final_pop = res.final_population;
1033        final_reason = res.termination_reason.clone();
1034
1035        // If improved, update global best
1036        if res.best_solution.0 < global_best {
1037            global_best = res.best_solution.0;
1038            global_params = res.best_solution.1.clone();
1039        }
1040
1041        // Keep the new state for next iteration
1042        cmaes_state = Some(state_out);
1043
1044        // Possibly break if we exhausted the entire budget
1045        if config.use_subrun_budgeting && total_budget > 0 && total_evals_used >= total_budget {
1046            if config.verbosity >= 1 {
1047                eprintln!(
1048                    "IPOP early stop: total_evals_used={} >= total_evals_budget={}",
1049                    total_evals_used, total_budget
1050                );
1051            }
1052            break;
1053        }
1054
1055        // If more restarts remain, grow the population size
1056        if restart_i < restarts {
1057            config.population_size =
1058                (config.population_size as f64 * config.ipop_increase_factor).round() as usize;
1059            if config.verbosity >= 2 {
1060                eprintln!(
1061                    "[DEBUG] IPOP restart {} => new pop_size={}, total_evals_used={}",
1062                    restart_i + 1,
1063                    config.population_size,
1064                    total_evals_used
1065                );
1066            }
1067        }
1068    }
1069
1070    CmaesResult {
1071        best_solution: (global_best, global_params),
1072        generations_used: total_gens_used,
1073        termination_reason: format!(
1074            "{} (IPOP restarts used: {})",
1075            final_reason, config.ipop_restarts
1076        ),
1077        final_population: final_pop,
1078    }
1079}
1080
1081/// BIPOP with advanced state reuse.
1082fn run_bipop<F>(
1083    objective: &F,
1084    bounds: &[(f64, f64)],
1085    mut config: CmaesCanonicalConfig,
1086    initial_mean: Option<Vec<f64>>,
1087
1088) -> CmaesResult
1089where
1090    F: Fn(&[f64]) -> f64 + Sync + Send,
1091{
1092    let restarts = config.bipop_restarts;
1093    let dim = bounds.len();
1094    let baseline_pop = if config.population_size == 0 {
1095        4 + (3.0 * (dim as f64).ln()) as usize
1096    } else {
1097        config.population_size
1098    };
1099
1100    let mut global_best = f64::INFINITY;
1101    let mut global_params = vec![];
1102    let mut final_pop = None;
1103    let mut final_reason = String::new();
1104    let mut total_gens_used = 0;
1105
1106    let mut total_evals_used = 0usize;
1107    let total_budget = config.total_evals_budget;
1108
1109    // Start with no distribution (None => init on first run)
1110    let mut cmaes_state: Option<CmaesIntermediateState> = None;
1111
1112    let mut large_pop = baseline_pop;
1113    let small_pop = (baseline_pop as f64 * config.bipop_small_population_factor.unwrap_or(0.5)).ceil() as usize;
1114
1115    for restart_i in 0..=restarts {
1116        let use_small = (restart_i % 2) == 0;
1117        let pop_size = if use_small { small_pop } else { large_pop };
1118
1119        let leftover = total_budget.saturating_sub(total_evals_used);
1120
1121        // For BIPOP, let's do a different fraction for large vs. small.
1122        let eval_limit_this_run = if config.use_subrun_budgeting && leftover > 0 {
1123            let factor = if use_small { config.bipop_small_budget_factor.unwrap_or(1.0) } else { config.bipop_large_budget_factor.unwrap_or(3.0) };
1124            // A simple approach: fraction = factor / (restarts - restart_i + 1)
1125            // Then clamp.
1126            let runs_left = (restarts - restart_i) + 1;
1127            let chunk = ((factor / runs_left as f64) * leftover as f64).round() as usize;
1128            chunk.min(leftover)
1129        } else {
1130            0
1131        };
1132
1133        if config.verbosity >= 2 {
1134            eprintln!(
1135                "[DEBUG] BIPOP restart {} => pop_size={}, small? {}, leftover_budget={}, evals_limit={}",
1136                restart_i, pop_size, use_small, leftover, eval_limit_this_run
1137            );
1138        }
1139
1140        // Temporarily override config's population_size
1141        let old_pop = config.population_size;
1142        config.population_size = pop_size;
1143
1144        let (res, state_out, _improved, used_evals) = cmaes_one_run(
1145            objective,
1146            bounds,
1147            &config,
1148            cmaes_state.take(),
1149            global_best,
1150            config.seed + restart_i as u64,
1151            eval_limit_this_run,
1152            if restart_i == 0 { initial_mean.as_deref() } else { None },
1153        );
1154
1155        // Restore original pop_size in config if needed
1156        config.population_size = old_pop;
1157
1158        total_gens_used += res.generations_used;
1159        total_evals_used += used_evals;
1160        final_pop = res.final_population;
1161        final_reason = res.termination_reason.clone();
1162
1163        if res.best_solution.0 < global_best {
1164            global_best = res.best_solution.0;
1165            global_params = res.best_solution.1.clone();
1166        }
1167
1168        // Save updated state for next iteration
1169        cmaes_state = Some(state_out);
1170
1171        // If that was a large run, we double large_pop next time we do large
1172        // If it was a small run, we can keep small_pop or slightly adjust
1173        if !use_small {
1174            large_pop = (large_pop as f64 * config.bipop_large_pop_increase_factor.unwrap_or(2.0)).round() as usize;
1175        }
1176
1177        if config.use_subrun_budgeting && total_budget > 0 && total_evals_used >= total_budget {
1178            if config.verbosity >= 1 {
1179                eprintln!(
1180                    "[DEBUG] BIPOP early stop: total_evals_used={} >= total_evals_budget={}",
1181                    total_evals_used, total_budget
1182                );
1183            }
1184            break;
1185        }
1186    }
1187
1188    CmaesResult {
1189        best_solution: (global_best, global_params),
1190        generations_used: total_gens_used,
1191        termination_reason: format!(
1192            "{} (BIPOP restarts used: {})",
1193            final_reason, config.bipop_restarts
1194        ),
1195        final_population: final_pop,
1196    }
1197}