heuropt 0.11.0 - Docs.rs

//! `SeparableNes` — Wierstra et al. 2008/2014 Natural Evolution Strategy
//! with diagonal covariance (sNES).

use rand_distr::{Distribution, Normal};

use crate::core::candidate::Candidate;
use crate::core::evaluation::Evaluation;
use crate::core::objective::Direction;
use crate::core::population::Population;
use crate::core::problem::Problem;
use crate::core::result::OptimizationResult;
use crate::core::rng::rng_from_seed;
use crate::operators::real::RealBounds;
use crate::traits::Optimizer;

/// Configuration for [`SeparableNes`].
#[derive(Debug, Clone)]
pub struct SeparableNesConfig {
    /// Population size `λ` per generation. NES recommends `4 + ⌊3·ln(n)⌋`.
    pub population_size: usize,
    /// Number of generations.
    pub generations: usize,
    /// Initial step size `σ_0`.
    pub initial_sigma: f64,
    /// Mean learning rate `η_μ`. NES default is 1.0.
    pub mean_learning_rate: f64,
    /// Sigma learning rate `η_σ`. NES default is `(3 + ln(n)) / (5·sqrt(n))`,
    /// computed at runtime if you set this to `None`.
    pub sigma_learning_rate: Option<f64>,
    /// Seed for the deterministic RNG.
    pub seed: u64,
}

impl Default for SeparableNesConfig {
    fn default() -> Self {
        Self {
            population_size: 16,
            generations: 200,
            initial_sigma: 0.5,
            mean_learning_rate: 1.0,
            sigma_learning_rate: None,
            seed: 42,
        }
    }
}

/// Separable Natural Evolution Strategy (sNES).
///
/// `Vec<f64>` decisions only. Single-objective only. Maintains a sampling
/// distribution `N(μ, diag(σ²))` and updates `μ`, `σ` each generation by
/// following the natural gradient of expected fitness, with rank-shaped
/// fitness utilities for invariance to monotone transforms of the
/// objective.
///
/// # Example
///
/// ```
/// use heuropt::prelude::*;
///
/// struct Sphere;
/// impl Problem for Sphere {
///     type Decision = Vec<f64>;
///     fn objectives(&self) -> ObjectiveSpace {
///         ObjectiveSpace::new(vec![Objective::minimize("f")])
///     }
///     fn evaluate(&self, x: &Vec<f64>) -> Evaluation {
///         Evaluation::new(vec![x.iter().map(|v| v * v).sum::<f64>()])
///     }
/// }
///
/// let mut opt = SeparableNes::new(
///     SeparableNesConfig {
///         population_size: 16,
///         generations: 80,
///         initial_sigma: 1.0,
///         mean_learning_rate: 1.0,
///         sigma_learning_rate: None, // use NES default
///         seed: 42,
///     },
///     RealBounds::new(vec![(-5.0, 5.0); 3]),
/// );
/// let r = opt.run(&Sphere);
/// assert!(r.best.unwrap().evaluation.objectives[0] < 1e-3);
/// ```
#[derive(Debug, Clone)]
pub struct SeparableNes {
    /// Algorithm configuration.
    pub config: SeparableNesConfig,
    /// Per-variable bounds — used to seed `μ` (midpoint) and clamp every
    /// sampled offspring.
    pub bounds: RealBounds,
}

impl SeparableNes {
    /// Construct a `SeparableNes`.
    pub fn new(config: SeparableNesConfig, bounds: RealBounds) -> Self {
        Self { config, bounds }
    }
}

impl<P> Optimizer<P> for SeparableNes
where
    P: Problem<Decision = Vec<f64>> + Sync,
{
    fn run(&mut self, problem: &P) -> OptimizationResult<P::Decision> {
        assert!(
            self.config.population_size >= 2,
            "SeparableNes population_size must be >= 2",
        );
        assert!(
            self.config.initial_sigma > 0.0,
            "SeparableNes initial_sigma must be > 0"
        );
        let objectives = problem.objectives();
        assert!(
            objectives.is_single_objective(),
            "SeparableNes requires exactly one objective",
        );
        let direction = objectives.objectives[0].direction;
        let n = self.bounds.bounds.len();
        let lambda = self.config.population_size;
        let mut rng = rng_from_seed(self.config.seed);

        // Initial state.
        let mut mean: Vec<f64> = self
            .bounds
            .bounds
            .iter()
            .map(|&(lo, hi)| 0.5 * (lo + hi))
            .collect();
        let mut sigma = vec![self.config.initial_sigma; n];

        // Default sigma learning rate (Wierstra et al. 2014, Eq. 11).
        let eta_sigma = self
            .config
            .sigma_learning_rate
            .unwrap_or_else(|| (3.0 + (n as f64).ln()) / (5.0 * (n as f64).sqrt()));
        let eta_mean = self.config.mean_learning_rate;

        // Rank utilities — the standard NES weighting:
        //   u_i = max(0, ln(λ/2 + 1) - ln(i)) / Σ - 1/λ
        // (positive total mass, zero sum after the shift).
        let utilities = nes_utilities(lambda);

        let mut best_seen: Option<Candidate<Vec<f64>>> = None;
        let mut total_evaluations = 0usize;

        for _ in 0..self.config.generations {
            // Sample λ offspring.
            let mut z_samples: Vec<Vec<f64>> = Vec::with_capacity(lambda);
            let mut x_samples: Vec<Vec<f64>> = Vec::with_capacity(lambda);
            let mut evals: Vec<Evaluation> = Vec::with_capacity(lambda);
            for _ in 0..lambda {
                let z: Vec<f64> = (0..n)
                    .map(|_| Normal::new(0.0, 1.0).unwrap().sample(&mut rng))
                    .collect();
                let x: Vec<f64> = (0..n)
                    .map(|j| {
                        let v = mean[j] + sigma[j] * z[j];
                        let (lo, hi) = self.bounds.bounds[j];
                        v.clamp(lo, hi)
                    })
                    .collect();
                let e = problem.evaluate(&x);
                total_evaluations += 1;
                let beats_best = match &best_seen {
                    None => true,
                    Some(b) => better(&e, &b.evaluation, direction),
                };
                if beats_best {
                    best_seen = Some(Candidate::new(x.clone(), e.clone()));
                }
                z_samples.push(z);
                x_samples.push(x);
                evals.push(e);
            }

            // Sort offspring best → worst (so utility[0] goes to the best).
            let mut order: Vec<usize> = (0..lambda).collect();
            order.sort_by(|&a, &b| compare(&evals[a], &evals[b], direction));

            // Update mean: μ ← μ + η_μ · σ · Σ u_i · z_i
            let mut grad_mean = vec![0.0_f64; n];
            for k in 0..lambda {
                let u = utilities[k];
                let z = &z_samples[order[k]];
                for j in 0..n {
                    grad_mean[j] += u * z[j];
                }
            }
            for j in 0..n {
                mean[j] += eta_mean * sigma[j] * grad_mean[j];
                let (lo, hi) = self.bounds.bounds[j];
                mean[j] = mean[j].clamp(lo, hi);
            }

            // Update sigma: σ_j ← σ_j · exp((η_σ/2) · Σ u_i · (z_i,j² - 1))
            for j in 0..n {
                let mut grad_sigma_j = 0.0;
                for k in 0..lambda {
                    let u = utilities[k];
                    let z = &z_samples[order[k]];
                    grad_sigma_j += u * (z[j] * z[j] - 1.0);
                }
                sigma[j] *= (0.5 * eta_sigma * grad_sigma_j).exp();
                if !sigma[j].is_finite() || sigma[j] < 1e-30 {
                    sigma[j] = 1e-30;
                }
            }
        }

        let best = best_seen.expect("at least one generation evaluated");
        let population = Population::new(vec![best.clone()]);
        let front = vec![best.clone()];
        OptimizationResult::new(
            population,
            front,
            Some(best),
            total_evaluations,
            self.config.generations,
        )
    }
}

#[cfg(feature = "async")]
impl SeparableNes {
    /// Async version of [`Optimizer::run`] — drives evaluations through
    /// the user-chosen async runtime. Available only with the `async`
    /// feature.
    ///
    /// `concurrency` bounds in-flight evaluations per generation.
    pub async fn run_async<P>(
        &mut self,
        problem: &P,
        concurrency: usize,
    ) -> OptimizationResult<Vec<f64>>
    where
        P: crate::core::async_problem::AsyncProblem<Decision = Vec<f64>>,
    {
        use crate::algorithms::parallel_eval_async::evaluate_batch_async;

        assert!(
            self.config.population_size >= 2,
            "SeparableNes population_size must be >= 2",
        );
        assert!(
            self.config.initial_sigma > 0.0,
            "SeparableNes initial_sigma must be > 0"
        );
        let objectives = problem.objectives();
        assert!(
            objectives.is_single_objective(),
            "SeparableNes requires exactly one objective",
        );
        let direction = objectives.objectives[0].direction;
        let n = self.bounds.bounds.len();
        let lambda = self.config.population_size;
        let mut rng = rng_from_seed(self.config.seed);

        let mut mean: Vec<f64> = self
            .bounds
            .bounds
            .iter()
            .map(|&(lo, hi)| 0.5 * (lo + hi))
            .collect();
        let mut sigma = vec![self.config.initial_sigma; n];

        let eta_sigma = self
            .config
            .sigma_learning_rate
            .unwrap_or_else(|| (3.0 + (n as f64).ln()) / (5.0 * (n as f64).sqrt()));
        let eta_mean = self.config.mean_learning_rate;

        let utilities = nes_utilities(lambda);

        let mut best_seen: Option<Candidate<Vec<f64>>> = None;
        let mut total_evaluations = 0usize;

        for _ in 0..self.config.generations {
            // Sample λ offspring; matches the sync RNG draw order so seeded
            // runs reproduce exactly.
            let mut z_samples: Vec<Vec<f64>> = Vec::with_capacity(lambda);
            let mut x_samples: Vec<Vec<f64>> = Vec::with_capacity(lambda);
            for _ in 0..lambda {
                let z: Vec<f64> = (0..n)
                    .map(|_| Normal::new(0.0, 1.0).unwrap().sample(&mut rng))
                    .collect();
                let x: Vec<f64> = (0..n)
                    .map(|j| {
                        let v = mean[j] + sigma[j] * z[j];
                        let (lo, hi) = self.bounds.bounds[j];
                        v.clamp(lo, hi)
                    })
                    .collect();
                z_samples.push(z);
                x_samples.push(x);
            }

            let cands = evaluate_batch_async(problem, x_samples.clone(), concurrency).await;
            total_evaluations += cands.len();
            let evals: Vec<Evaluation> = cands.iter().map(|c| c.evaluation.clone()).collect();
            for c in &cands {
                let beats_best = match &best_seen {
                    None => true,
                    Some(b) => better(&c.evaluation, &b.evaluation, direction),
                };
                if beats_best {
                    best_seen = Some(c.clone());
                }
            }

            let mut order: Vec<usize> = (0..lambda).collect();
            order.sort_by(|&a, &b| compare(&evals[a], &evals[b], direction));

            let mut grad_mean = vec![0.0_f64; n];
            for k in 0..lambda {
                let u = utilities[k];
                let z = &z_samples[order[k]];
                for j in 0..n {
                    grad_mean[j] += u * z[j];
                }
            }
            for j in 0..n {
                mean[j] += eta_mean * sigma[j] * grad_mean[j];
                let (lo, hi) = self.bounds.bounds[j];
                mean[j] = mean[j].clamp(lo, hi);
            }

            for j in 0..n {
                let mut grad_sigma_j = 0.0;
                for k in 0..lambda {
                    let u = utilities[k];
                    let z = &z_samples[order[k]];
                    grad_sigma_j += u * (z[j] * z[j] - 1.0);
                }
                sigma[j] *= (0.5 * eta_sigma * grad_sigma_j).exp();
                if !sigma[j].is_finite() || sigma[j] < 1e-30 {
                    sigma[j] = 1e-30;
                }
            }
        }

        let best = best_seen.expect("at least one generation evaluated");
        let population = Population::new(vec![best.clone()]);
        let front = vec![best.clone()];
        OptimizationResult::new(
            population,
            front,
            Some(best),
            total_evaluations,
            self.config.generations,
        )
    }
}

fn nes_utilities(lambda: usize) -> Vec<f64> {
    let half = lambda as f64 / 2.0 + 1.0;
    let raw: Vec<f64> = (0..lambda)
        .map(|i| {
            let v = half.ln() - ((i + 1) as f64).ln();
            v.max(0.0)
        })
        .collect();
    let sum: f64 = raw.iter().sum::<f64>().max(1e-12);
    let inv_lambda = 1.0 / lambda as f64;
    raw.iter().map(|u| u / sum - inv_lambda).collect()
}

fn compare(a: &Evaluation, b: &Evaluation, direction: Direction) -> std::cmp::Ordering {
    match (a.is_feasible(), b.is_feasible()) {
        (true, false) => std::cmp::Ordering::Less,
        (false, true) => std::cmp::Ordering::Greater,
        (false, false) => a
            .constraint_violation
            .partial_cmp(&b.constraint_violation)
            .unwrap_or(std::cmp::Ordering::Equal),
        (true, true) => match direction {
            Direction::Minimize => a.objectives[0]
                .partial_cmp(&b.objectives[0])
                .unwrap_or(std::cmp::Ordering::Equal),
            Direction::Maximize => b.objectives[0]
                .partial_cmp(&a.objectives[0])
                .unwrap_or(std::cmp::Ordering::Equal),
        },
    }
}

fn better(a: &Evaluation, b: &Evaluation, direction: Direction) -> bool {
    compare(a, b, direction) == std::cmp::Ordering::Less
}

impl crate::traits::AlgorithmInfo for SeparableNes {
    fn name(&self) -> &'static str {
        "sNES"
    }
    fn full_name(&self) -> &'static str {
        "Separable Natural Evolution Strategy"
    }
    fn seed(&self) -> Option<u64> {
        Some(self.config.seed)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::tests_support::{SchafferN1, Sphere1D};

    fn make_optimizer(seed: u64) -> SeparableNes {
        SeparableNes::new(
            SeparableNesConfig {
                population_size: 16,
                generations: 200,
                initial_sigma: 0.5,
                mean_learning_rate: 1.0,
                sigma_learning_rate: None,
                seed,
            },
            RealBounds::new(vec![(-5.0, 5.0)]),
        )
    }

    #[test]
    fn finds_minimum_of_sphere() {
        let mut opt = make_optimizer(1);
        let r = opt.run(&Sphere1D);
        let best = r.best.unwrap();
        assert!(
            best.evaluation.objectives[0] < 1e-6,
            "got f = {}",
            best.evaluation.objectives[0],
        );
    }

    #[test]
    fn deterministic_with_same_seed() {
        let mut a = make_optimizer(99);
        let mut b = make_optimizer(99);
        let ra = a.run(&Sphere1D);
        let rb = b.run(&Sphere1D);
        assert_eq!(
            ra.best.unwrap().evaluation.objectives,
            rb.best.unwrap().evaluation.objectives,
        );
    }

    #[test]
    fn utilities_sum_to_zero() {
        let u = nes_utilities(8);
        let s: f64 = u.iter().sum();
        assert!(s.abs() < 1e-12, "utilities sum to {s}, not 0");
    }

    #[test]
    #[should_panic(expected = "exactly one objective")]
    fn multi_objective_panics() {
        let mut opt = make_optimizer(0);
        let _ = opt.run(&SchafferN1);
    }

    // ---- Mutation-test pinned helpers --------------------------------------

    use crate::core::evaluation::Evaluation;
    use crate::core::objective::Direction;

    #[test]
    fn nes_utilities_sum_to_zero_and_are_descending() {
        // The NES utility weights are a shifted log-rank scheme; they sum
        // to (approximately) zero and the first (best-ranked) is largest.
        let u = nes_utilities(10);
        assert_eq!(u.len(), 10);
        let sum: f64 = u.iter().sum();
        assert!(sum.abs() < 1e-9, "utilities sum = {sum}");
        // Descending: best rank gets the most weight.
        for w in u.windows(2) {
            assert!(w[0] >= w[1] - 1e-12, "not descending: {:?}", u);
        }
        // The first utility is positive (it gets above-average weight).
        assert!(u[0] > 0.0);
    }

    #[test]
    fn compare_feasibility_first_and_direction() {
        let feasible = Evaluation::new(vec![100.0]);
        let infeasible = Evaluation::constrained(vec![0.0], 1.0);
        assert_eq!(
            compare(&feasible, &infeasible, Direction::Minimize),
            std::cmp::Ordering::Less
        );
        let lo = Evaluation::new(vec![1.0]);
        let hi = Evaluation::new(vec![2.0]);
        assert_eq!(
            compare(&lo, &hi, Direction::Minimize),
            std::cmp::Ordering::Less
        );
        assert_eq!(
            compare(&lo, &hi, Direction::Maximize),
            std::cmp::Ordering::Greater
        );
    }

    #[test]
    fn better_is_strict_less() {
        let lo = Evaluation::new(vec![1.0]);
        let hi = Evaluation::new(vec![2.0]);
        assert!(better(&lo, &hi, Direction::Minimize));
        assert!(!better(&hi, &lo, Direction::Minimize));
        let eq = Evaluation::new(vec![1.0]);
        assert!(!better(&lo, &eq, Direction::Minimize));
    }
}