basin 0.1.0

An optimization library for Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
use std::marker::PhantomData;

use crate::core::constraint::BoxConstrained;
use crate::core::executor::run_loop;
use crate::core::math::{
    ComponentMulAssign, MatTransposeVec, MatVec, MatrixIdentity, NormSquared, RankOneUpdate,
    SampleStandardNormal, SampleUniformBox, ScaleInPlace, ScaledAdd, SymmetricEigen, VectorLen,
};
use crate::core::problem::CostFunction;
use crate::core::rng::{ChaCha8Rng, Rng, SeedableRng};
use crate::core::solver::Solver;
use crate::core::state::{BasicPopulationState, PopulationState, State};
use crate::core::termination::{MaxCostEvals, TerminationCriterion, TerminationReason};
use crate::solver::cma_es::CmaEs;
use crate::solver::ssga::{
    bga_mutate_in_place, blx_alpha_crossover, nam_select, replace_worst_if_better,
};

/// Per-individual chain snapshot: a [`CmaEs`] carrying full evolution
/// state and the [`BasicPopulationState`] holding the last generation's
/// candidates that the next `next_iter` needs as a recombination basis.
/// Stored in [`MaLsChState::cma_chains`] as `Option<ChainSlot<V, M>>`
/// (`None` for individuals that have never undergone LS).
type ChainSlot<V, M> = (CmaEs<V, M>, BasicPopulationState<V>);

/// State carried by [`MaLsChCma`]: a steady-state population plus
/// per-individual local-search chain data.
///
/// Solver-private (declared `pub` so the impl can construct it but kept
/// out of `core::state` per AGENTS.md tenet 4 — one consumer, no
/// shared abstraction to design yet). Each chain entry is the saved
/// `(CmaEs, BasicPopulationState)` pair the inner needs for a resumed
/// run: the [`CmaEs`] carries the algorithmic-constants and evolution
/// state (mean, sigma, covariance, paths); the
/// [`BasicPopulationState`] carries the previous generation's λ
/// candidates the next CMA `next_iter` needs as the `y_sorted` basis
/// for its m/σ/C update.
pub struct MaLsChState<V, M> {
    pub(crate) candidates: Vec<V>,
    pub(crate) costs: Vec<f64>,
    pub(crate) cma_chains: Vec<Option<ChainSlot<V, M>>>,
    /// Cost of `candidates[i]` at the end of its last LS application,
    /// or `+∞` if never LS'd. Used to evaluate the S_LS eligibility
    /// rule (Molina 2010 §4.3 step 1: `last_ls_cost − current_cost ≥
    /// δ_LS_min`).
    pub(crate) last_ls_cost: Vec<f64>,
    pub(crate) ls_application_count: Vec<u32>,
    iter: u64,
    cost_evals: u64,
}

impl<V, M> MaLsChState<V, M> {
    /// Number of LS applications that have completed on
    /// `candidates[i]` so far. Exposed for tests that need to verify
    /// the chain machinery is firing (e.g. a single individual being
    /// re-selected and resumed).
    pub fn ls_application_count(&self, i: usize) -> u32 {
        self.ls_application_count[i]
    }
}

impl<V, M> State for MaLsChState<V, M> {
    type Param = V;
    type Float = f64;

    fn iter(&self) -> u64 {
        self.iter
    }
    fn increment_iter(&mut self) {
        self.iter += 1;
    }
    fn cost_evals(&self) -> u64 {
        self.cost_evals
    }
    fn increment_cost_evals(&mut self, by: u64) {
        self.cost_evals += by;
    }
    fn param(&self) -> &V {
        &self.candidates[0]
    }
    fn cost(&self) -> f64 {
        self.costs[0]
    }
}

impl<V, M> PopulationState for MaLsChState<V, M> {
    fn candidates(&self) -> &[V] {
        &self.candidates
    }
    fn costs(&self) -> &[f64] {
        &self.costs
    }
}

impl<V, M> MaLsChState<V, M> {
    /// Build an empty state for `MaLsChCma::init` to fill. Use as the
    /// initial state passed to
    /// [`Executor`](crate::core::executor::Executor::new).
    pub fn new() -> Self {
        Self {
            candidates: Vec::new(),
            costs: Vec::new(),
            cma_chains: Vec::new(),
            last_ls_cost: Vec::new(),
            ls_application_count: Vec::new(),
            iter: 0,
            cost_evals: 0,
        }
    }
}

impl<V, M> Default for MaLsChState<V, M> {
    fn default() -> Self {
        Self::new()
    }
}

/// `MA-LSCh-CMA` — memetic algorithm with local-search chains using
/// CMA-ES as the inner LS operator, per Molina et al. 2010 §4.4.
///
/// A steady-state real-coded GA (SSGA: BLX-α + NAM + BGA + replace-
/// worst) explores globally; CMA-ES exploits locally on individuals
/// that look promising. The novel piece is **chain persistence**: each
/// individual that has undergone LS keeps the *full CMA-ES evolution
/// state* (`m`, `σ`, `C`, `p_σ`, `p_c`, eigendecomposition `B/D`) so
/// that re-selecting it later resumes the same CMA-ES run from where it
/// last stopped, rather than restarting from scratch. CMA-ES adapts a
/// per-basin search distribution; the chain mechanism rewards basins
/// that keep improving by extending their LS time.
///
/// # Algorithm
///
/// One [`next_iter`](Solver::next_iter) does:
///
/// 1. **SSGA phase.** Loop SSGA offspring generation
///    (NAM → BLX-α → BGA → replace-worst) until `nfrec` cost
///    evaluations have been spent (Molina 2010 §4.3 step 2). When
///    replace-worst displaces an individual, its chain (if any) is
///    discarded — the new genome is treated as never-LS'd.
/// 2. **Build `S_LS`** = `{ i : never LS'd OR
///    last_ls_cost[i] − costs[i] ≥ δ_LS_min }` (§4.3 step 3).
/// 3. **Pick `c_LS`.** If `S_LS` non-empty, take the best individual
///    in it; otherwise take the best individual in the whole population
///    (Molina §4.3 final rule, line 371 of `references/molina-2010`).
/// 4. **Resume-or-fresh CMA-ES.**
///    - If `c_LS` has no stored chain: construct a fresh
///      [`CmaEs`] with `m = candidates[c_LS]`,
///      `σ = ½ · min_{j ≠ c_LS} ‖candidates[c_LS] − candidates[j]‖`,
///      and a per-individual seed derived from the outer RNG (§4.4.6).
///    - Otherwise: take the saved `(CmaEs, BasicPopulationState)` out
///      of the chain slot. The CmaEs has full evolution state; the
///      BasicPopulationState has the previous generation's λ candidates
///      the next CMA `next_iter` uses as the recombination basis.
/// 5. **Drive the inner.** Reset the inner state's `cost_evals` to 0,
///    then `run_loop(problem, state, &mut cma,
///    &mut [MaxCostEvals(ls_intensity)], u64::MAX)`. `CmaEs::init` is
///    idempotent (early-returns when its `state` is `Some`), so
///    resumed runs keep their evolution state across calls.
/// 6. **Aggregate, route failures, write back.** Per AGENTS.md
///    "Solver composition" rules:
///    - Roll `inner_result.state.cost_evals()` into outer
///      `cost_evals` (rule 1: eval aggregation).
///    - Bubble `SolverFailed` (rule 3: failure routing); other
///      reasons (`MaxCostEvals`, `SolverConverged`) are clean stops.
///    - If `inner_result.cost() < costs[c_LS]`, write the improved
///      param/cost back. Always update `last_ls_cost[c_LS]` and
///      `ls_application_count[c_LS]`. Store the advanced
///      `(CmaEs, BasicPopulationState)` back in the slot.
/// 7. **Resort** the population (and parallel arrays) ascending.
///
/// # Default parameters
///
/// All defaults follow Molina 2010 §4.4.7 unless noted:
///
/// | Field | Default | Source |
/// |---|---|---|
/// | `pop_size` | `60` | §4.4.7 |
/// | `blx_alpha` | `0.5` | §4.4.7 |
/// | `nam_pool` | `4` (=`n_ass + 1` with `n_ass = 3`) | §4.4.7 |
/// | `mutation_prob` | `0.125` | §4.4.7 |
/// | `bga_range_fraction` | `0.1` | §4.4.4 |
/// | `ls_intensity` (`I_str`) | `300` | Bergmeir 2016 example |
/// | `ls_improvement_threshold` (`δ_LS_min`) | `1e-8` | §4.4.7 |
/// | `nfrec` | `= ls_intensity` | Derived from `r_L/G = 0.5` (§4.3) |
/// | `inner_lambda` | `CmaEs::default_lambda(D)` | Hansen 2016 |
/// | `initial_sigma_fallback` | `1.0` | when min-neighbor distance is 0 |
///
/// # Reproducibility
///
/// Carries a [`ChaCha8Rng`] seeded from the `seed: u64` passed to
/// [`new`](Self::new). Each fresh chain pulls its own per-individual
/// seed from this outer RNG, so the chain trajectories stay
/// deterministic for a fixed outer seed (load-bearing for stochastic-
/// solver reproducibility across platforms including
/// `wasm32-unknown-unknown`).
///
/// # Contract
///
/// - **Caller must:** implement [`CostFunction<Param = V, Output = f64>`]
///   *and* [`BoxConstrained<Param = V>`] on the problem. The SSGA needs
///   the box for initial sampling, BLX clipping, and BGA range; the
///   per-individual CMA-ES inner does not see the box (so the inner
///   CMA-ES is *unbounded* — chain individuals can drift outside the
///   box and be discarded only via the SSGA replace-worst feedback
///   loop). This matches Molina 2010 §4.4.6.
/// - **Caller must:** hand in a [`MaLsChState::new()`].
/// - **Implementor must:** maintain the
///   [`PopulationState`](crate::core::state::PopulationState)
///   sorted-by-cost invariant at the start and end of every iteration.
///
/// # Termination
///
/// No solver-internal optimality test. Pair with framework criteria
/// — typically
/// [`MaxCostEvals`](crate::core::termination::MaxCostEvals) for budget
/// control. Chain segments overshoot `I_str` by up to `λ_inner − 1`
/// evaluations (CMA-ES runs whole generations); the outer `MaxCostEvals`
/// will fire on the next outer iteration boundary, not exactly on the
/// budget. Document and accept; matches Bergmeir's reference behavior.
///
/// # Backends
///
/// Same coverage as [`CmaEs`]: `nalgebra` (`DVector` / `DMatrix`),
/// `faer` (`Col` / `Mat`), and `ndarray::Array1<f64>` with
/// `ndarray-linalg` not required (`SymmetricEigen` for `Array1` is
/// gated by the matrix backend that *isn't* available for ndarray, so
/// in practice nalgebra and faer). `Vec<f64>` produces a compile-time
/// error per tenet 5 (no honest matrix type).
pub struct MaLsChCma<V, M> {
    pop_size: usize,
    blx_alpha: f64,
    nam_pool: usize,
    mutation_prob: f64,
    bga_range_fraction: f64,
    ls_intensity: u64,
    ls_improvement_threshold: f64,
    nfrec: Option<u64>,
    inner_lambda: Option<usize>,
    initial_sigma_fallback: f64,
    seed: u64,
    rng: Option<ChaCha8Rng>,
    _phantom: PhantomData<(V, M)>,
}

impl<V, M> MaLsChCma<V, M> {
    /// Build a new `MaLsChCma` with the Molina 2010 §4.4.7 defaults
    /// and a PRNG seeded from `seed`.
    pub fn new(seed: u64) -> Self {
        Self {
            pop_size: 60,
            blx_alpha: 0.5,
            nam_pool: 4,
            mutation_prob: 0.125,
            bga_range_fraction: 0.1,
            ls_intensity: 300,
            ls_improvement_threshold: 1e-8,
            nfrec: None,
            inner_lambda: None,
            initial_sigma_fallback: 1.0,
            seed,
            rng: None,
            _phantom: PhantomData,
        }
    }

    /// Override the SSGA population size (default `60`).
    ///
    /// # Panics
    ///
    /// Panics if `pop_size < nam_pool`. NAM needs at least `nam_pool`
    /// individuals to sample from.
    pub fn with_pop_size(mut self, pop_size: usize) -> Self {
        assert!(
            pop_size >= self.nam_pool,
            "MaLsChCma requires pop_size >= nam_pool (got pop_size={}, nam_pool={})",
            pop_size,
            self.nam_pool
        );
        self.pop_size = pop_size;
        self
    }

    /// Override the BLX-α parameter (default `0.5`).
    ///
    /// # Panics
    ///
    /// Panics if `alpha < 0`.
    pub fn with_blx_alpha(mut self, alpha: f64) -> Self {
        assert!(alpha >= 0.0, "blx_alpha must be >= 0, got {}", alpha);
        self.blx_alpha = alpha;
        self
    }

    /// Override the NAM pool size (default `4`).
    ///
    /// # Panics
    ///
    /// Panics if `pool < 2`.
    pub fn with_nam_pool(mut self, pool: usize) -> Self {
        assert!(pool >= 2, "nam_pool must be >= 2, got {}", pool);
        self.nam_pool = pool;
        self
    }

    /// Override the per-gene BGA mutation probability (default `0.125`).
    ///
    /// # Panics
    ///
    /// Panics if `p` is not in `[0, 1]`.
    pub fn with_mutation_prob(mut self, p: f64) -> Self {
        assert!(
            (0.0..=1.0).contains(&p),
            "mutation_prob must be in [0, 1], got {}",
            p
        );
        self.mutation_prob = p;
        self
    }

    /// Override the BGA range fraction (default `0.1`).
    ///
    /// # Panics
    ///
    /// Panics if `f <= 0`.
    pub fn with_bga_range_fraction(mut self, f: f64) -> Self {
        assert!(f > 0.0, "bga_range_fraction must be > 0, got {}", f);
        self.bga_range_fraction = f;
        self
    }

    /// Override `I_str`, the per-chain LS intensity in cost-evaluation
    /// units (default `300`, Bergmeir 2016 example value). Each chain
    /// segment runs CMA-ES for whole generations until `cost_evals ≥
    /// I_str`, slightly overshooting `I_str` by up to `λ_inner − 1`.
    ///
    /// # Panics
    ///
    /// Panics if `istr == 0`.
    pub fn with_ls_intensity(mut self, istr: u64) -> Self {
        assert!(istr >= 1, "ls_intensity must be >= 1, got {}", istr);
        self.ls_intensity = istr;
        self
    }

    /// Override `δ_LS_min`, the cost-improvement threshold an
    /// already-LS'd individual must clear to be re-eligible for LS
    /// (default `1e-8`, Molina 2010 §4.4.7).
    ///
    /// # Panics
    ///
    /// Panics if `delta < 0`.
    pub fn with_ls_improvement_threshold(mut self, delta: f64) -> Self {
        assert!(
            delta >= 0.0,
            "ls_improvement_threshold must be >= 0, got {}",
            delta
        );
        self.ls_improvement_threshold = delta;
        self
    }

    /// Override `n_frec`, the number of SSGA cost evaluations performed
    /// between LS applications (default is to match `ls_intensity`,
    /// which gives the 50/50 effort split Molina 2010 §4.3 recommends
    /// from `r_L/G = 0.5`).
    ///
    /// # Panics
    ///
    /// Panics if `n == 0`.
    pub fn with_nfrec(mut self, n: u64) -> Self {
        assert!(n >= 1, "nfrec must be >= 1, got {}", n);
        self.nfrec = Some(n);
        self
    }

    /// Override the inner CMA-ES population size `λ_inner` (default is
    /// [`CmaEs::default_lambda(D)`](CmaEs::default_lambda) computed at
    /// init time from the problem's dimension).
    ///
    /// # Panics
    ///
    /// Panics if `lambda < 4` (Hansen 2016's lower bound on CMA-ES λ).
    pub fn with_inner_lambda(mut self, lambda: usize) -> Self {
        assert!(lambda >= 4, "inner_lambda must be >= 4, got {}", lambda);
        self.inner_lambda = Some(lambda);
        self
    }

    /// Override the σ value used when constructing a fresh CMA-ES
    /// chain for an individual whose nearest-neighbor distance is `0`
    /// (degenerate identical-population case). Default `1.0`.
    ///
    /// # Panics
    ///
    /// Panics if `sigma <= 0`.
    pub fn with_initial_sigma_fallback(mut self, sigma: f64) -> Self {
        assert!(
            sigma > 0.0,
            "initial_sigma_fallback must be > 0, got {}",
            sigma
        );
        self.initial_sigma_fallback = sigma;
        self
    }
}

/// Compute `0.5 · min_{j ≠ i} ‖candidates[i] − candidates[j]‖₂`, the
/// per-individual σ-init formula from Molina 2010 §4.4.6. Returns
/// `None` if there's no other individual (singleton population).
fn sigma_init_for<V>(candidates: &[V], i: usize) -> Option<f64>
where
    V: Clone + ScaledAdd<f64> + NormSquared,
{
    if candidates.len() < 2 {
        return None;
    }
    let mut best_sq = f64::INFINITY;
    for (j, x) in candidates.iter().enumerate() {
        if j == i {
            continue;
        }
        let mut diff = candidates[i].clone();
        diff.scaled_add(-1.0, x);
        let d_sq = diff.norm_squared();
        if d_sq < best_sq {
            best_sq = d_sq;
        }
    }
    Some(0.5 * best_sq.sqrt())
}

impl<P, V, M> Solver<P, MaLsChState<V, M>> for MaLsChCma<V, M>
where
    P: CostFunction<Param = V, Output = f64> + BoxConstrained<Param = V>,
    V: VectorLen
        + Clone
        + SampleUniformBox
        + SampleStandardNormal
        + ScaledAdd<f64>
        + ScaleInPlace
        + ComponentMulAssign
        + NormSquared
        + std::ops::Index<usize, Output = f64>
        + std::ops::IndexMut<usize, Output = f64>,
    M: MatrixIdentity
        + MatVec<V>
        + MatTransposeVec<V>
        + ScaleInPlace
        + RankOneUpdate<V>
        + SymmetricEigen<V>
        + Clone,
{
    fn init(&mut self, problem: &P, mut state: MaLsChState<V, M>) -> MaLsChState<V, M> {
        let lo = problem.lower();
        let hi = problem.upper();
        let mut rng = ChaCha8Rng::seed_from_u64(self.seed);

        // Sample the initial population uniformly in the box.
        state.candidates.clear();
        state.costs.clear();
        state.cma_chains.clear();
        state.last_ls_cost.clear();
        state.ls_application_count.clear();
        for _ in 0..self.pop_size {
            let x = V::sample_uniform_box(lo, hi, &mut rng);
            let c = problem.cost(&x);
            state.candidates.push(x);
            state.costs.push(c);
            state.cma_chains.push(None);
            state.last_ls_cost.push(f64::INFINITY);
            state.ls_application_count.push(0);
        }
        state.cost_evals += self.pop_size as u64;
        sort_parallel_arrays(&mut state);

        self.rng = Some(rng);
        state
    }

    fn next_iter(
        &mut self,
        problem: &P,
        mut state: MaLsChState<V, M>,
    ) -> (MaLsChState<V, M>, Option<TerminationReason>) {
        let rng = self
            .rng
            .as_mut()
            .expect("MaLsChCma::init must run before next_iter");
        let lo = problem.lower();
        let hi = problem.upper();
        let nfrec = self.nfrec.unwrap_or(self.ls_intensity);

        // -- Phase 1: SSGA for nfrec evaluations. --
        let evals_at_phase_start = state.cost_evals;
        while state.cost_evals - evals_at_phase_start < nfrec {
            let (p1, p2) = nam_select(&state.candidates, self.nam_pool, rng);
            let mut child = blx_alpha_crossover(
                &state.candidates[p1],
                &state.candidates[p2],
                self.blx_alpha,
                lo,
                hi,
                rng,
            );
            bga_mutate_in_place(
                &mut child,
                lo,
                hi,
                self.mutation_prob,
                self.bga_range_fraction,
                rng,
            );
            let c_child = problem.cost(&child);
            state.cost_evals += 1;
            if let Some(replaced_idx) =
                replace_worst_if_better(&mut state.candidates, &mut state.costs, child, c_child)
            {
                // The displaced individual's chain (if any) is orphaned:
                // the new genome is a fresh point that should start its
                // own chain on first LS pick.
                state.cma_chains[replaced_idx] = None;
                state.last_ls_cost[replaced_idx] = f64::INFINITY;
                state.ls_application_count[replaced_idx] = 0;
            }
        }
        sort_parallel_arrays(&mut state);

        // -- Phase 2: pick the LS target c_LS. --
        let mut c_ls: Option<usize> = None;
        let mut best_cost_in_s_ls = f64::INFINITY;
        for i in 0..state.candidates.len() {
            let eligible = state.cma_chains[i].is_none()
                || (state.last_ls_cost[i] - state.costs[i] >= self.ls_improvement_threshold);
            if eligible && state.costs[i] < best_cost_in_s_ls {
                best_cost_in_s_ls = state.costs[i];
                c_ls = Some(i);
            }
        }
        // Molina §4.3: when |S_LS| = 0, apply LS to the best individual
        // unconditionally.
        let c_ls = c_ls.unwrap_or(0);

        // -- Phase 3: resume or construct the inner CMA-ES. --
        let (mut cma, inner_state) = match state.cma_chains[c_ls].take() {
            Some((cma, inner_state)) => {
                let mut s = inner_state;
                // Local budget: MaxCostEvals(ls_intensity) is checked
                // against `state.cost_evals()`, which is reset here so
                // each chain segment gets a fresh I_str budget.
                s.cost_evals = 0;
                s.iter = 0;
                (cma, s)
            }
            None => {
                let n = state.candidates[c_ls].vec_len();
                let sigma_init = sigma_init_for(&state.candidates, c_ls)
                    .filter(|s| *s > 0.0)
                    .unwrap_or(self.initial_sigma_fallback);
                let derived_seed = rng.random::<u64>();
                let mut cma =
                    CmaEs::<V, M>::new(state.candidates[c_ls].clone(), sigma_init, derived_seed);
                if let Some(lam) = self.inner_lambda {
                    cma = cma.with_lambda(lam);
                }
                // Empty inner state — CmaEs::init will populate the
                // first generation since `cma`'s working state is None.
                let lambda = self
                    .inner_lambda
                    .unwrap_or(CmaEs::<V, M>::default_lambda(n));
                let inner_state = BasicPopulationState::<V>::with_size(lambda);
                (cma, inner_state)
            }
        };

        // -- Phase 4: drive the inner. --
        // Build per-call criteria so `MaxCostEvals` doesn't leak state
        // between chain segments (it's stateless, but the
        // `InnerExecutor` reuse pattern doesn't fit here since we hold
        // a different `cma` per individual). Allocation cost is one
        // box per chain segment — negligible against I_str evals.
        let mut criteria: Vec<Box<dyn TerminationCriterion<BasicPopulationState<V>>>> =
            vec![Box::new(MaxCostEvals(self.ls_intensity))];
        let inner_result = run_loop(problem, inner_state, &mut cma, &mut criteria, u64::MAX);

        // -- Phase 5: aggregate, route failures, write back. --
        // Rule 1: eval aggregation.
        state.cost_evals += inner_result.state.cost_evals();
        // Rule 3: failure routing. `SolverFailed` is the only failure
        // reason; other reasons (MaxCostEvals from our budget,
        // SolverConverged from CMA's TolX) are clean stops the outer
        // consumes.
        if inner_result.reason.is_failure() {
            // Leave the chain dropped so a future pick would restart.
            return (state, Some(inner_result.reason));
        }

        let new_cost = inner_result.cost();
        let new_param = inner_result.param().clone();
        // Conditional write-back: only adopt the LS result if it
        // improves on the current cost. Strict Molina §4.3 step 10 is
        // unconditional, but a conditional update is safer (CMA-ES is
        // genuinely non-monotone over a chain segment) and matches the
        // Rmalschains R package's behavior.
        if new_cost < state.costs[c_ls] {
            state.candidates[c_ls] = new_param;
            state.costs[c_ls] = new_cost;
        }
        state.last_ls_cost[c_ls] = state.costs[c_ls];
        state.ls_application_count[c_ls] = state.ls_application_count[c_ls].saturating_add(1);
        state.cma_chains[c_ls] = Some((cma, inner_result.state));

        // -- Phase 6: resort all parallel arrays jointly. --
        sort_parallel_arrays(&mut state);

        (state, None)
    }
}

/// Joint ascending-by-cost sort over the five parallel arrays in
/// [`MaLsChState`]. The chain pointer travels with its individual
/// through the permutation — that's why the chain belongs in the
/// state, not in a side index.
fn sort_parallel_arrays<V, M>(state: &mut MaLsChState<V, M>) {
    let n = state.candidates.len();
    debug_assert_eq!(n, state.costs.len());
    debug_assert_eq!(n, state.cma_chains.len());
    debug_assert_eq!(n, state.last_ls_cost.len());
    debug_assert_eq!(n, state.ls_application_count.len());

    let mut idx: Vec<usize> = (0..n).collect();
    idx.sort_by(|&i, &j| {
        state.costs[i]
            .partial_cmp(&state.costs[j])
            .unwrap_or(std::cmp::Ordering::Equal)
    });

    apply_permutation::<V>(&mut state.candidates, &idx);
    apply_permutation::<f64>(&mut state.costs, &idx);
    apply_permutation::<Option<ChainSlot<V, M>>>(&mut state.cma_chains, &idx);
    apply_permutation::<f64>(&mut state.last_ls_cost, &idx);
    apply_permutation::<u32>(&mut state.ls_application_count, &idx);
}

/// Cycle-following in-place permutation: after the call,
/// `slice[i] = original[idx[i]]`. Mirrors the helper used by
/// `cma_es::sort_population_ascending`; inlined here because that
/// helper is module-private.
fn apply_permutation<T>(slice: &mut [T], idx: &[usize]) {
    let mut visited = vec![false; slice.len()];
    for start in 0..slice.len() {
        if visited[start] || idx[start] == start {
            visited[start] = true;
            continue;
        }
        let mut current = start;
        loop {
            let next = idx[current];
            visited[current] = true;
            if next == start {
                break;
            }
            slice.swap(current, next);
            current = next;
        }
    }
}