basin 0.1.0 - Docs.rs

//! Solver state shapes.
//!
//! Every [`Solver`](crate::core::solver::Solver) carries its iterate as a
//! [`State`]. The base [`State`] trait is the minimum the executor and
//! generic termination criteria need to read; richer state shapes extend
//! it ([`GradientState`] for first-order solvers, [`SimplexState`] for
//! simplex-based solvers like Nelder-Mead) so termination criteria can
//! bound on the minimum capability they need (tenet 3 in `AGENTS.md`).
//!
//! `State::Float` is technically generic but every concrete state ships
//! with `Float = f64`, and every shipped termination criterion that reads
//! costs assumes `f64`. See the *Provisional choices* section of
//! `AGENTS.md` — switching to `F: num_traits::Float` is planned but
//! deferred until the first stochastic solver lands.

/// Limited-memory BFGS / L-BFGS-B state (`LbfgsState`).
pub mod lbfgs;

pub use lbfgs::LbfgsState;

/// Minimum information the executor and generic termination criteria
/// need to read from a solver's iterate.
///
/// # Contract
///
/// - **Caller must:** construct via the appropriate concrete state
///   constructor (e.g. [`BasicState::new`]) before handing the state to
///   [`Executor`](crate::core::executor::Executor). The executor's `init`
///   call populates derived fields (cost, gradient) before any termination
///   check sees the state.
/// - **Implementor must:** keep [`param`](Self::param) stable between
///   iterations — the returned reference is valid until the next
///   [`Solver::next_iter`](crate::core::solver::Solver::next_iter)
///   returns. [`cost_evals`](Self::cost_evals) counts every call to the
///   problem's cost function, not iterations: a single
///   [`Solver::next_iter`](crate::core::solver::Solver::next_iter) may
///   evaluate the cost many times (line searches, Nelder-Mead shrinks),
///   and users budget against this counter rather than
///   [`iter`](Self::iter).
pub trait State {
    /// The parameter type the solver iterates over (e.g. `Vec<f64>`,
    /// `nalgebra::DVector<f64>`).
    type Param;
    /// The scalar type of the objective. In practice always `f64` (see
    /// the module docs).
    type Float;

    /// Number of fully completed iterations. A
    /// [`Solver::next_iter`](crate::core::solver::Solver::next_iter)
    /// that bails mid-iteration with `Some(reason)` does not increment
    /// this counter — see the
    /// [`executor`](crate::core::executor) module for the exact ordering.
    fn iter(&self) -> u64;
    /// Increment [`iter`](Self::iter) by one. Called by the executor
    /// after a successful [`Solver::next_iter`](crate::core::solver::Solver::next_iter).
    fn increment_iter(&mut self);
    /// Cumulative count of cost-function evaluations performed so far.
    /// Diverges from `iter()` whenever a single iteration evaluates the
    /// cost more than once (line searches, Nelder-Mead shrinks, etc.) —
    /// this is what users actually budget against.
    fn cost_evals(&self) -> u64;
    /// Increase the cost-eval counter by `by`. Solvers call this whenever
    /// they invoke the problem's cost function.
    fn increment_cost_evals(&mut self, by: u64);
    /// Current iterate. Stable between
    /// [`Solver::next_iter`](crate::core::solver::Solver::next_iter)
    /// calls; safe to read at any iteration including iter 0.
    fn param(&self) -> &Self::Param;
    /// Cost at the current [`param`](Self::param).
    ///
    /// # Panics
    ///
    /// Concrete states ([`BasicState`], [`BasicSimplexState`], and
    /// `QuasiNewtonState` under the `nalgebra` feature) panic if
    /// `cost()` is read before
    /// [`Solver::init`](crate::core::solver::Solver::init) has populated
    /// the cached cost. By contract the executor calls `init` before any
    /// termination criterion check, so reads from criteria and from
    /// [`OptimizationResult`](crate::core::executor::OptimizationResult)
    /// are safe.
    fn cost(&self) -> Self::Float;
}

/// States that carry a gradient at the current [`param`](State::param).
///
/// # Contract
///
/// - **Implementor must:** at the end of every successful
///   [`Solver::next_iter`](crate::core::solver::Solver::next_iter)
///   (and at the end of [`Solver::init`](crate::core::solver::Solver::init)
///   for first-order solvers), populate
///   [`gradient`](Self::gradient) so it corresponds to the *current*
///   [`param`](State::param). Termination criteria read it; if it lags
///   behind the param they will fire on stale data.
/// - `None` means "no gradient available at this iterate yet" — the
///   only legitimate case is before
///   [`Solver::init`](crate::core::solver::Solver::init) has run, used
///   by criteria like [`GradientTolerance`](crate::core::termination::GradientTolerance)
///   to silently skip the check.
pub trait GradientState: State {
    /// Gradient at the current [`param`](State::param), if populated.
    fn gradient(&self) -> Option<&Self::Param>;
    /// Cumulative count of gradient evaluations performed so far. Lives
    /// on `GradientState` rather than `State` so derivative-free states
    /// don't carry a counter they can never increment.
    fn gradient_evals(&self) -> u64;
    /// Increase the gradient-eval counter by `by`. Solvers call this
    /// whenever they invoke the problem's gradient function.
    fn increment_gradient_evals(&mut self, by: u64);
}

/// States built around a simplex of `n + 1` vertices and parallel costs.
///
/// Mirrors [`GradientState`]: the trait exists so termination criteria
/// (e.g. the simplex-collapse test of Lagarias et al. 1998, eq. T1, in
/// [`SimplexTolerance`](crate::core::termination::SimplexTolerance)) can
/// bound on a richer view than [`State::param`] / [`State::cost`], which
/// only see the best vertex.
///
/// # Contract
///
/// - **Implementor must:** keep [`vertices`](Self::vertices) and
///   [`costs`](Self::costs) sorted by **ascending cost** at the start and
///   end of every [`Solver::next_iter`](crate::core::solver::Solver::next_iter)
///   call (and at the end of [`Solver::init`](crate::core::solver::Solver::init)).
///   So [`State::param`] / [`State::cost`] always return the current best
///   vertex (`vertices[0]` / `costs[0]`).
/// - **Implementor must:** sort `NaN` costs *last*, so a single bad
///   evaluation can't drag itself to the front and become the
///   "best" vertex.
/// - **Implementor must:** keep the two slices the same length and in
///   parallel order — `costs[i]` is the cost at `vertices[i]`.
pub trait SimplexState: State {
    /// All `n + 1` vertices, sorted by ascending cost.
    fn vertices(&self) -> &[Self::Param];
    /// Costs in parallel with [`vertices`](Self::vertices), sorted ascending.
    fn costs(&self) -> &[Self::Float];
}

/// States built around a population of `λ` candidate parameters and
/// parallel costs.
///
/// Mirrors [`SimplexState`]: the trait exists so termination criteria
/// that need to inspect the whole population (diversity, generation
/// spread, stall counters) can bound on a richer view than
/// [`State::param`] / [`State::cost`], which only see the best
/// candidate. The vehicle for stochastic solvers
/// ([`RandomSearch`](crate::solver::RandomSearch); CMA-ES once it lands).
///
/// # Contract
///
/// - **Implementor must:** keep [`candidates`](Self::candidates) and
///   [`costs`](Self::costs) sorted by **ascending cost** at the start
///   and end of every
///   [`Solver::next_iter`](crate::core::solver::Solver::next_iter)
///   call (and at the end of [`Solver::init`](crate::core::solver::Solver::init)).
///   So [`State::param`] / [`State::cost`] always return the current
///   best candidate (`candidates[0]` / `costs[0]`).
/// - **Implementor must:** sort `NaN` costs *last*, so a single bad
///   evaluation can't drag itself to the front and become the
///   "best" candidate.
/// - **Implementor must:** keep the two slices the same length and in
///   parallel order — `costs[i]` is the cost at `candidates[i]`.
pub trait PopulationState: State {
    /// All `λ` candidates, sorted by ascending cost.
    fn candidates(&self) -> &[Self::Param];
    /// Costs in parallel with [`candidates`](Self::candidates), sorted
    /// ascending.
    fn costs(&self) -> &[Self::Float];
}

/// Default state for single-iterate solvers (gradient descent,
/// Gauss-Newton, …): one `param`, optional cached cost and gradient,
/// plus iteration / evaluation counters.
pub struct BasicState<P> {
    pub(crate) param: P,
    pub(crate) cost: Option<f64>,
    pub(crate) gradient: Option<P>,
    pub(crate) iter: u64,
    pub(crate) cost_evals: u64,
    pub(crate) gradient_evals: u64,
}

impl<P> BasicState<P> {
    /// Build a state at the given starting point. Cost and gradient
    /// are filled in by [`Solver::init`](crate::core::solver::Solver::init).
    pub fn new(param: P) -> Self {
        Self {
            param,
            cost: None,
            gradient: None,
            iter: 0,
            cost_evals: 0,
            gradient_evals: 0,
        }
    }
}

impl<P> State for BasicState<P> {
    type Param = P;
    type Float = f64;

    fn iter(&self) -> u64 {
        self.iter
    }

    fn increment_iter(&mut self) {
        self.iter += 1;
    }

    fn cost_evals(&self) -> u64 {
        self.cost_evals
    }

    fn increment_cost_evals(&mut self, by: u64) {
        self.cost_evals += by;
    }

    fn param(&self) -> &P {
        &self.param
    }

    /// Reads the cost cached at the current `param`.
    ///
    /// # Panics
    ///
    /// Panics if accessed before
    /// [`Solver::init`](crate::core::solver::Solver::init) has populated
    /// the cached cost. By contract,
    /// [`Executor`](crate::core::executor::Executor) calls `init` before
    /// any termination-criterion check (see the
    /// [`executor`](crate::core::executor) module docs for the full
    /// ordering), so reads from inside criteria and from
    /// [`OptimizationResult`](crate::core::executor::OptimizationResult)
    /// are safe.
    fn cost(&self) -> f64 {
        self.cost
            .expect("BasicState::cost read before Solver::init populated it")
    }
}

impl<P> GradientState for BasicState<P> {
    fn gradient(&self) -> Option<&P> {
        self.gradient.as_ref()
    }

    fn gradient_evals(&self) -> u64 {
        self.gradient_evals
    }

    fn increment_gradient_evals(&mut self, by: u64) {
        self.gradient_evals += by;
    }
}

/// Default `SimplexState` implementation: `n + 1` vertices and their costs
/// in parallel `Vec`s. The solver keeps both sorted by ascending cost at
/// the start and end of every `next_iter`, so `param()` / `cost()` always
/// return the current best vertex.
pub struct BasicSimplexState<V> {
    pub(crate) vertices: Vec<V>,
    pub(crate) costs: Vec<f64>,
    pub(crate) iter: u64,
    pub(crate) cost_evals: u64,
}

impl<V> BasicSimplexState<V> {
    /// Build from a pre-constructed simplex (advanced users / non-default
    /// initial geometries). For the common case of "I just have a starting
    /// point", prefer the backend-specific `BasicSimplexState::new`
    /// constructors.
    pub fn from_simplex(vertices: Vec<V>) -> Self {
        assert!(
            vertices.len() >= 2,
            "BasicSimplexState requires at least 2 vertices (n+1 for an n-D problem)"
        );
        let n = vertices.len();
        Self {
            vertices,
            costs: vec![f64::INFINITY; n],
            iter: 0,
            cost_evals: 0,
        }
    }
}

/// FMINSEARCH/SciPy-style initial simplex from a single starting point.
///
/// Implemented per backend (`Vec<f64>`, `nalgebra::DVector<f64>`, …) so a
/// single `BasicSimplexState::new(x0)` constructor works uniformly across
/// backends. The default step is 5% on non-zero coordinates and an
/// absolute `0.00025` on zero coordinates.
pub trait IntoInitialSimplex<V> {
    /// Build a simplex of `n + 1` vertices around `self`, perturbing each
    /// coordinate by `relative_step`.
    fn into_initial_simplex(self, relative_step: f64) -> Vec<V>;
}

impl IntoInitialSimplex<Vec<f64>> for Vec<f64> {
    fn into_initial_simplex(self, relative_step: f64) -> Vec<Vec<f64>> {
        let n = self.len();
        let mut simplex = Vec::with_capacity(n + 1);
        simplex.push(self.clone());
        for i in 0..n {
            let mut v = self.clone();
            v[i] = if self[i] != 0.0 {
                (1.0 + relative_step) * self[i]
            } else {
                0.00025
            };
            simplex.push(v);
        }
        simplex
    }
}

#[cfg(feature = "nalgebra")]
impl IntoInitialSimplex<nalgebra::DVector<f64>> for nalgebra::DVector<f64> {
    fn into_initial_simplex(self, relative_step: f64) -> Vec<nalgebra::DVector<f64>> {
        let n = self.len();
        let mut simplex = Vec::with_capacity(n + 1);
        simplex.push(self.clone());
        for i in 0..n {
            let mut v = self.clone();
            v[i] = if self[i] != 0.0 {
                (1.0 + relative_step) * self[i]
            } else {
                0.00025
            };
            simplex.push(v);
        }
        simplex
    }
}

#[cfg(feature = "faer")]
impl IntoInitialSimplex<faer::Col<f64>> for faer::Col<f64> {
    fn into_initial_simplex(self, relative_step: f64) -> Vec<faer::Col<f64>> {
        let n = self.nrows();
        let mut simplex = Vec::with_capacity(n + 1);
        simplex.push(self.clone());
        for i in 0..n {
            let mut v = self.clone();
            v[i] = if self[i] != 0.0 {
                (1.0 + relative_step) * self[i]
            } else {
                0.00025
            };
            simplex.push(v);
        }
        simplex
    }
}

#[cfg(feature = "ndarray")]
impl IntoInitialSimplex<ndarray::Array1<f64>> for ndarray::Array1<f64> {
    fn into_initial_simplex(self, relative_step: f64) -> Vec<ndarray::Array1<f64>> {
        let n = self.len();
        let mut simplex = Vec::with_capacity(n + 1);
        simplex.push(self.clone());
        for i in 0..n {
            let mut v = self.clone();
            v[i] = if self[i] != 0.0 {
                (1.0 + relative_step) * self[i]
            } else {
                0.00025
            };
            simplex.push(v);
        }
        simplex
    }
}

impl<V> BasicSimplexState<V> {
    /// Build an FMINSEARCH/SciPy-style simplex around a starting point
    /// `x0`. Mirrors `BasicState::new` ergonomically — the solver infers
    /// dimension from the simplex during `init`.
    pub fn new<X: IntoInitialSimplex<V>>(x0: X) -> Self {
        Self::from_simplex(x0.into_initial_simplex(0.05))
    }

    /// Like `new`, but with a custom relative step (default is `0.05`).
    /// Zero coordinates still use the FMINSEARCH absolute step `0.00025`.
    pub fn with_step<X: IntoInitialSimplex<V>>(x0: X, relative_step: f64) -> Self {
        Self::from_simplex(x0.into_initial_simplex(relative_step))
    }
}

impl<V> State for BasicSimplexState<V> {
    type Param = V;
    type Float = f64;

    fn iter(&self) -> u64 {
        self.iter
    }

    fn increment_iter(&mut self) {
        self.iter += 1;
    }

    fn cost_evals(&self) -> u64 {
        self.cost_evals
    }

    fn increment_cost_evals(&mut self, by: u64) {
        self.cost_evals += by;
    }

    fn param(&self) -> &V {
        &self.vertices[0]
    }

    fn cost(&self) -> f64 {
        self.costs[0]
    }
}

impl<V> SimplexState for BasicSimplexState<V> {
    fn vertices(&self) -> &[V] {
        &self.vertices
    }

    fn costs(&self) -> &[f64] {
        &self.costs
    }
}

/// State for quasi-Newton solvers that maintain a dense inverse-Hessian
/// approximation `H ≈ ∇²f(x)⁻¹` (BFGS, DFP, SR1).
///
/// Hardcoded to nalgebra dynamic vectors/matrices for v1 (BFGS only
/// supports the nalgebra backend — see `solver::bfgs` doc). L-BFGS will
/// need a different state shape (history of `(s, y)` pairs) when it lands.
///
/// `initial_scaling_done` tracks whether we've applied the standard
/// `H₀ ← (sᵀy / yᵀy)·I` rescaling after the first accepted step (Nocedal
/// & Wright (6.20)). This makes the unit step well-scaled on poorly
/// conditioned problems where plain identity initialization stalls.
#[cfg(feature = "nalgebra")]
pub struct QuasiNewtonState<V, M> {
    pub(crate) param: V,
    pub(crate) cost: Option<f64>,
    pub(crate) gradient: Option<V>,
    pub(crate) inverse_hessian: M,
    pub(crate) initial_scaling_done: bool,
    pub(crate) iter: u64,
    pub(crate) cost_evals: u64,
    pub(crate) gradient_evals: u64,
}

#[cfg(feature = "nalgebra")]
impl QuasiNewtonState<nalgebra::DVector<f64>, nalgebra::DMatrix<f64>> {
    /// Build a state at the given starting point with the inverse-Hessian
    /// approximation initialised to the identity.
    pub fn new(param: nalgebra::DVector<f64>) -> Self {
        let n = param.len();
        Self {
            param,
            cost: None,
            gradient: None,
            inverse_hessian: nalgebra::DMatrix::identity(n, n),
            initial_scaling_done: false,
            iter: 0,
            cost_evals: 0,
            gradient_evals: 0,
        }
    }
}

#[cfg(feature = "nalgebra")]
impl<V, M> State for QuasiNewtonState<V, M> {
    type Param = V;
    type Float = f64;

    fn iter(&self) -> u64 {
        self.iter
    }

    fn increment_iter(&mut self) {
        self.iter += 1;
    }

    fn cost_evals(&self) -> u64 {
        self.cost_evals
    }

    fn increment_cost_evals(&mut self, by: u64) {
        self.cost_evals += by;
    }

    fn param(&self) -> &V {
        &self.param
    }

    /// Reads the cost cached at the current `param`.
    ///
    /// # Panics
    ///
    /// Panics if accessed before
    /// [`Solver::init`](crate::core::solver::Solver::init) has populated
    /// the cached cost. See [`BasicState::cost`] for the full safety
    /// argument — same contract.
    fn cost(&self) -> f64 {
        self.cost
            .expect("QuasiNewtonState::cost read before Solver::init populated it")
    }
}

#[cfg(feature = "nalgebra")]
impl<V, M> GradientState for QuasiNewtonState<V, M> {
    fn gradient(&self) -> Option<&V> {
        self.gradient.as_ref()
    }

    fn gradient_evals(&self) -> u64 {
        self.gradient_evals
    }

    fn increment_gradient_evals(&mut self, by: u64) {
        self.gradient_evals += by;
    }
}

/// Default [`PopulationState`] implementation: `λ` candidate parameters
/// and parallel costs. The solver keeps both sorted by ascending cost
/// at the start and end of every `next_iter`, so [`State::param`] /
/// [`State::cost`] always return the current best candidate.
///
/// Vehicle for [`RandomSearch`](crate::solver::RandomSearch); will be
/// reused by CMA-ES (S8) without changes.
pub struct BasicPopulationState<V> {
    pub(crate) candidates: Vec<V>,
    pub(crate) costs: Vec<f64>,
    pub(crate) iter: u64,
    pub(crate) cost_evals: u64,
}

impl<V> BasicPopulationState<V> {
    /// Build from a pre-constructed population (advanced users; custom
    /// initial distributions). Costs are filled by the solver in
    /// [`Solver::init`](crate::core::solver::Solver::init).
    ///
    /// # Panics
    ///
    /// Panics if `candidates` is empty — a population must have at
    /// least one member.
    pub fn from_population(candidates: Vec<V>) -> Self {
        assert!(
            !candidates.is_empty(),
            "BasicPopulationState requires a non-empty population"
        );
        let n = candidates.len();
        Self {
            candidates,
            costs: vec![f64::INFINITY; n],
            iter: 0,
            cost_evals: 0,
        }
    }

    /// Empty container with `lambda` capacity reserved. The solver
    /// fills it in [`Solver::init`](crate::core::solver::Solver::init)
    /// (e.g. by sampling uniformly in the problem's box).
    ///
    /// Use this constructor when the *solver* owns the initial-
    /// population distribution (the random-search style); use
    /// [`from_population`](Self::from_population) when the *caller* owns
    /// it.
    ///
    /// # Panics
    ///
    /// Panics if `lambda == 0`.
    pub fn with_size(lambda: usize) -> Self {
        assert!(lambda >= 1, "BasicPopulationState requires lambda >= 1");
        Self {
            candidates: Vec::with_capacity(lambda),
            costs: Vec::with_capacity(lambda),
            iter: 0,
            cost_evals: 0,
        }
    }
}

impl<V> State for BasicPopulationState<V> {
    type Param = V;
    type Float = f64;

    fn iter(&self) -> u64 {
        self.iter
    }

    fn increment_iter(&mut self) {
        self.iter += 1;
    }

    fn cost_evals(&self) -> u64 {
        self.cost_evals
    }

    fn increment_cost_evals(&mut self, by: u64) {
        self.cost_evals += by;
    }

    fn param(&self) -> &V {
        &self.candidates[0]
    }

    fn cost(&self) -> f64 {
        self.costs[0]
    }
}

impl<V> PopulationState for BasicPopulationState<V> {
    fn candidates(&self) -> &[V] {
        &self.candidates
    }

    fn costs(&self) -> &[f64] {
        &self.costs
    }
}