basin 0.1.0 - Docs.rs

// Index-based loops and many-arg helpers mirror the Fortran source for
// parity. Both lints are blanket-allowed for this module.
#![allow(clippy::needless_range_loop, clippy::too_many_arguments)]

//! L-BFGS-B solver.
//!
//! Faithful port of Nocedal–Zhu's L-BFGS-B v3.0 Fortran source
//! (`references/lbfgsb-v3.0/`). The Fortran subroutines map to
//! submodules below, and the top-level [`LBFGSB`] solver mirrors the
//! `mainlb` iteration loop (with the goto-style coroutine flattened
//! to a Rust `loop`):
//!
//! - [`cauchy`] — generalized Cauchy point along the projected gradient
//!   path. Port of `cauchy.f`.
//! - [`subsm`] — subspace minimization with `iword == 1` bound-
//!   backtracking (v3.0 deviation). Port of `subsm.f`.
//! - [`formk`] — `L·E·Lᵀ` factorization of the indefinite middle
//!   matrix `K`. Port of `formk.f`.
//! - [`compact`] — compact-form helpers (`formt`, `bmv`, pure-Rust
//!   Cholesky and triangular solves).
//! - [`backend`] — the [`backend::AsFloatSliceMut`] trait that lets
//!   the slice-based numerics work generically over `Vec<f64>`,
//!   `nalgebra::DVector<f64>`, and `faer::Col<f64>`.

pub(crate) mod backend;
pub(crate) mod cauchy;
pub(crate) mod compact;
pub(crate) mod formk;
pub(crate) mod subsm;

use crate::core::constraint::BoxConstrained;
use crate::core::math::{Dot, ScaledAdd};
use crate::core::problem::{CostFunction, Gradient};
use crate::core::solver::Solver;
use crate::core::state::lbfgs::{LbfgsState, LbfgsbWork};
use crate::core::termination::TerminationReason;
use crate::line_search::{LineSearch, MoreThuente};

use self::backend::{AsFloatSlice, AsFloatSliceMut};
use self::cauchy::{cauchy, iwhere as iwh};
use self::compact::{bmv, formt};
use self::formk::formk;
use self::subsm::subsm;

/// L-BFGS-B (limited-memory BFGS with box constraints).
///
/// Byrd–Lu–Nocedal 1995 / Zhu–Byrd–Lu–Nocedal 1997 (ACM TOMS Alg. 778),
/// with the Nocedal–Morales 2011 v3.0 directional-derivative + bound-
/// backtracking deviation in subspace minimization. Faithful port of
/// the Fortran v3.0 reference at `references/lbfgsb-v3.0/`.
///
/// At each iteration L-BFGS-B:
///
/// 1. Walks the projected gradient ray, building a piecewise-quadratic
///    model and identifying the **generalized Cauchy point** `xcp` —
///    the minimizer along the path (see [`cauchy`]).
/// 2. Restricts to the free variables at `xcp` and computes an
///    approximate **subspace minimizer** via a structured Newton step
///    against the limited-memory compact-form Hessian (see [`subsm`]).
///    If the projected Newton step is infeasible, a uniform-α
///    bound-backtracking branch fires.
/// 3. Performs a **Moré–Thuente line search** along `d = z − x`,
///    safeguarded so the step is feasible.
/// 4. Accepts the step, updates the limited-memory `(s, y)` history
///    when the curvature condition holds, and rebuilds the
///    compact-form middle matrix `T` via Cholesky factorization
///    (see [`formt`](compact::formt)).
///
/// On a singular middle-matrix or non-positive-definite `T`, the
/// solver clears the history and retries the iteration (Fortran's
/// `goto 222` reset path). One retry is enough — after clearing,
/// `col = 0` falls through to the line-search-only path, which
/// either succeeds with the projected steepest-descent direction or
/// fails the whole solve.
///
/// # Memory parameter
///
/// The history capacity `m` lives on [`LbfgsState`]:
/// `LbfgsState::new(x0, m)`. Fortran recommends `m ∈ [3, 20]`;
/// `m = 10` is a reasonable default.
///
/// # Termination
///
/// No solver-internal optimality test; the canonical first-order
/// metric is the framework-level
/// [`ProjectedGradientTolerance`](crate::core::termination::ProjectedGradientTolerance),
/// which captures bounds at construction. Pair with
/// [`MaxIter`](crate::core::termination::MaxIter),
/// [`MaxCostEvals`](crate::core::termination::MaxCostEvals), and
/// [`CostTolerance`](crate::core::termination::CostTolerance) as
/// desired.
///
/// # Backends
///
/// Generic over any parameter type implementing
/// [`backend::AsFloatSliceMut`] + [`Clone`] + [`Dot`] +
/// [`ScaledAdd<f64>`]. Built-in impls cover `Vec<f64>`,
/// `nalgebra::DVector<f64>` (feature `nalgebra`), and `faer::Col<f64>`
/// (feature `faer`). Other backends can implement the trait if their
/// storage is contiguous.
pub struct LBFGSB<S = MoreThuente> {
    line_search: S,
    /// Fortran `dr ≤ epsmch · ddum` curvature-skip threshold
    /// (`lbfgsb.f:875`). Defaults to `f64::EPSILON`.
    epsilon: f64,
    /// Built-in projected-gradient convergence tolerance. Emits
    /// [`TerminationReason::SolverConverged`] at the top of an
    /// iteration when `‖projgr(x, g, l, u)‖_∞ ≤ tol_pg`. Default
    /// `1e-10`. Set to `0.0` to disable (matches Fortran `pgtol = 0`
    /// — required for the iteration-wise parity test against the
    /// reference, which doesn't terminate on the projected gradient).
    tol_pg: f64,
    /// Default limited-memory history capacity (Fortran `m`,
    /// `references/lbfgsb-v3.0/`). Default `10`. Only consulted when
    /// the solver constructs the state itself — e.g. as a
    /// [`MemeticInner`](crate::solver::MemeticInner) seeding a fresh
    /// [`LbfgsState`] for a CMA-ES injection refinement. Standalone
    /// users supply `m_capacity` directly to `LbfgsState::new(x, m)`,
    /// in which case `LBFGSB::init` reads it off the state and this
    /// field is unused.
    ///
    /// `pub(crate)` so the `MemeticInner` impl in
    /// `solver/cma_inject.rs` can read it.
    pub(crate) m_capacity: usize,
}

impl Default for LBFGSB<MoreThuente> {
    fn default() -> Self {
        Self::new()
    }
}

impl LBFGSB<MoreThuente> {
    /// L-BFGS-B with Moré–Thuente line search and Fortran v3.0
    /// defaults (`ftol = 1e-3`, `gtol = 0.9`, `xtol = 0.1`). Built-in
    /// projected-gradient tolerance is `1e-10`; the seed history
    /// capacity (used only when the solver constructs the state, e.g.
    /// for memetic injection) is `10`.
    pub fn new() -> Self {
        Self {
            line_search: MoreThuente::new(),
            epsilon: f64::EPSILON,
            tol_pg: 1e-10,
            m_capacity: 10,
        }
    }
}

impl<S> LBFGSB<S> {
    /// L-BFGS-B with an explicit line-search strategy. Note: using
    /// anything other than [`MoreThuente`] forfeits iteration-wise
    /// parity with the Fortran reference.
    pub fn with_line_search(line_search: S) -> Self {
        Self {
            line_search,
            epsilon: f64::EPSILON,
            tol_pg: 1e-10,
            m_capacity: 10,
        }
    }

    /// Override the curvature-skip threshold. Default `f64::EPSILON`,
    /// matching Fortran's `dr ≤ epsmch · ddum` test.
    pub fn epsilon(mut self, epsilon: f64) -> Self {
        assert!(epsilon >= 0.0, "epsilon must be ≥ 0");
        self.epsilon = epsilon;
        self
    }

    /// Override the built-in projected-gradient convergence tolerance.
    /// Default `1e-10`; pass `0.0` to disable (Fortran-`pgtol=0`
    /// semantics, used by the iteration-wise parity test).
    pub fn tol_pg(mut self, tol_pg: f64) -> Self {
        assert!(tol_pg >= 0.0, "tol_pg must be ≥ 0");
        self.tol_pg = tol_pg;
        self
    }

    /// Override the default limited-memory history capacity used when
    /// the solver constructs its own [`LbfgsState`] (memetic seeding).
    /// Standalone usage that hands in a state via `LbfgsState::new(x, m)`
    /// is unaffected. Default `10`; Nocedal recommends `[3, 20]`.
    ///
    /// # Panics
    ///
    /// Panics if `m_capacity == 0`.
    pub fn m_capacity(mut self, m_capacity: usize) -> Self {
        assert!(m_capacity >= 1, "m_capacity must be ≥ 1");
        self.m_capacity = m_capacity;
        self
    }
}

impl<P, V, S> Solver<P, LbfgsState<V>> for LBFGSB<S>
where
    P: CostFunction<Param = V, Output = f64> + Gradient<Param = V, Gradient = V> + BoxConstrained,
    V: AsFloatSliceMut + Clone + Dot + ScaledAdd<f64>,
    S: LineSearch<P, V>,
{
    fn init(&mut self, problem: &P, mut state: LbfgsState<V>) -> LbfgsState<V> {
        let n = state.param.as_float_slice().len();
        let m = state.m_capacity;
        let mut work = LbfgsbWork::new(n, m);

        // Project the initial iterate onto the feasible box and
        // initialise `iwhere`, `cnstnd`, `boxed` (Fortran `active`,
        // `lbfgsb.f:1004`).
        active_init(
            state.param.as_float_slice_mut(),
            problem.lower().as_float_slice(),
            problem.upper().as_float_slice(),
            &mut work.iwhere,
            &mut work.cnstnd,
            &mut work.boxed,
        );

        state.cost = Some(problem.cost(&state.param));
        state.gradient = Some(problem.gradient(&state.param));
        state.cost_evals += 1;
        state.gradient_evals += 1;
        state.work = Some(work);
        state
    }

    fn next_iter(
        &mut self,
        problem: &P,
        mut state: LbfgsState<V>,
    ) -> (LbfgsState<V>, Option<TerminationReason>) {
        // Take the gradient and cost cached at the current `param`;
        // restore them on early exits.
        let g_v = state
            .gradient
            .take()
            .expect("gradient not set: Solver::init must run before next_iter");
        let f_old = state
            .cost
            .expect("cost not set: Solver::init must run before next_iter");

        let n = state.param.as_float_slice().len();
        let m = state.m_capacity;

        // Inner restart loop — Fortran's `goto 222` path. At most one
        // restart per iteration: after clearing history we either
        // succeed with the (col == 0) line-search-only path or bail.
        let mut restart_budget = 1u8;

        loop {
            let work = state.work.as_mut().expect("work missing");

            // -------------------------------------------------------
            // Phase A — projected gradient norm. Drives both the
            // built-in convergence check below and the cauchy
            // short-circuit further down. The framework-side
            // `ProjectedGradientTolerance` criterion does the same
            // calculation, but checking it inline here lets a memetic
            // wrapper (CMA-ES injection, `BoundedCmaInject`) skip
            // having to register an external criterion against bounds
            // it can't see at solver-build time.
            // -------------------------------------------------------
            let sbgnrm = projected_gradient_norm(
                state.param.as_float_slice(),
                g_v.as_float_slice(),
                problem.lower().as_float_slice(),
                problem.upper().as_float_slice(),
            );

            // Built-in convergence: emit `SolverConverged` when the
            // projected-gradient infinity-norm sits at the tolerance.
            // Restore the borrowed cost/gradient so callers reading
            // `state.gradient()` / `state.cost()` on the final result
            // see the values at the converged iterate. Set
            // `tol_pg = 0.0` (Fortran `pgtol = 0`) to disable.
            if sbgnrm <= self.tol_pg {
                state.gradient = Some(g_v);
                state.cost = Some(f_old);
                return (state, Some(TerminationReason::SolverConverged));
            }

            let col = state.ws.len();
            let theta = state.theta;
            let cnstnd = work.cnstnd;
            let boxed = work.boxed;
            let updatd = work.updatd;

            // -------------------------------------------------------
            // Phase B — generalized Cauchy point (or skip when no
            // bounds are active and we already have history).
            // -------------------------------------------------------
            let mut wrk = updatd;
            if !cnstnd && col > 0 {
                // Unbounded with history: skip GCP, set z := x.
                work.z.copy_from_slice(state.param.as_float_slice());
            } else {
                let ws_cols: Vec<&[f64]> = state.ws.iter().map(|v| v.as_float_slice()).collect();
                let wy_cols: Vec<&[f64]> = state.wy.iter().map(|v| v.as_float_slice()).collect();
                let cauchy_res = cauchy(
                    state.param.as_float_slice(),
                    problem.lower().as_float_slice(),
                    problem.upper().as_float_slice(),
                    g_v.as_float_slice(),
                    &ws_cols,
                    &wy_cols,
                    &state.sy,
                    &work.wt,
                    m,
                    theta,
                    sbgnrm,
                    &mut work.z,
                    &mut work.d,
                    &mut work.t_buf,
                    &mut work.iwhere,
                    &mut work.indx2,
                    &mut work.wa_p,
                    &mut work.wa_c,
                    &mut work.wa_wbp,
                    &mut work.wa_v,
                );
                if cauchy_res.is_err() {
                    if try_restart(&mut state, &g_v, f_old, &mut restart_budget) {
                        continue;
                    } else {
                        return (state, Some(TerminationReason::SolverFailed));
                    }
                }
            }

            // -------------------------------------------------------
            // Phase C — free / active partition (Fortran `freev`).
            // -------------------------------------------------------
            let (nfree, nenter, ileave) = freev(
                n,
                &work.iwhere,
                &mut work.index,
                &mut work.indx2,
                work.nfree,
                state.iter,
                cnstnd,
            );
            work.nfree = nfree;
            let wrk_local = (ileave < n) || (nenter > 0) || wrk;
            wrk = wrk_local;

            // -------------------------------------------------------
            // Phase D — subspace minimization (when there are free
            // variables and history to use).
            // -------------------------------------------------------
            if nfree > 0 && col > 0 {
                if wrk {
                    let ws_cols: Vec<&[f64]> =
                        state.ws.iter().map(|v| v.as_float_slice()).collect();
                    let wy_cols: Vec<&[f64]> =
                        state.wy.iter().map(|v| v.as_float_slice()).collect();
                    if formk(
                        &mut work.wn,
                        &mut work.wn1,
                        m,
                        col,
                        theta,
                        &state.sy,
                        &ws_cols,
                        &wy_cols,
                        nfree,
                        &work.index,
                        nenter,
                        ileave,
                        &work.indx2,
                        work.iupdat,
                        updatd,
                    )
                    .is_err()
                    {
                        if try_restart(&mut state, &g_v, f_old, &mut restart_budget) {
                            continue;
                        } else {
                            return (state, Some(TerminationReason::SolverFailed));
                        }
                    }
                }

                // cmprlb: r = −Z'B(z − x) − Z'g, indexed by the free
                // subspace.
                if cmprlb(
                    state.param.as_float_slice(),
                    g_v.as_float_slice(),
                    &work.z,
                    &mut work.r,
                    &mut work.wa_c,
                    &mut work.wa_p,
                    &state.sy,
                    &work.wt,
                    &state.ws,
                    &state.wy,
                    &work.index,
                    nfree,
                    cnstnd,
                    col,
                    theta,
                    m,
                )
                .is_err()
                {
                    if try_restart(&mut state, &g_v, f_old, &mut restart_budget) {
                        continue;
                    } else {
                        return (state, Some(TerminationReason::SolverFailed));
                    }
                }

                // subsm: writes the subspace minimizer into `z`.
                let ws_cols: Vec<&[f64]> = state.ws.iter().map(|v| v.as_float_slice()).collect();
                let wy_cols: Vec<&[f64]> = state.wy.iter().map(|v| v.as_float_slice()).collect();
                let subsm_res = subsm(
                    &mut work.z,
                    &mut work.r,
                    &mut work.xp,
                    state.param.as_float_slice(),
                    g_v.as_float_slice(),
                    &work.index[0..nfree],
                    problem.lower().as_float_slice(),
                    problem.upper().as_float_slice(),
                    &ws_cols,
                    &wy_cols,
                    &work.wn,
                    &mut work.wa_v,
                    m,
                    col,
                    theta,
                );
                if subsm_res.is_err() {
                    if try_restart(&mut state, &g_v, f_old, &mut restart_budget) {
                        continue;
                    } else {
                        return (state, Some(TerminationReason::SolverFailed));
                    }
                }
                // We don't read `SubsmStatus` — the projected step
                // is already applied to `work.z`, and the
                // line-search step cap below re-enforces feasibility.
            }

            // -------------------------------------------------------
            // Phase E — line search. Direction d = z − x.
            // -------------------------------------------------------
            for i in 0..n {
                work.d[i] = work.z[i] - state.param.as_float_slice()[i];
            }
            let dtd: f64 = work.d.iter().map(|x| x * x).sum();
            let dnorm = dtd.sqrt();
            work.dnorm = dnorm;

            // Maximum feasible step (Fortran lnsrlb `stpmx`).
            let stpmx = if cnstnd {
                if state.iter == 0 {
                    1.0
                } else {
                    feasible_step_cap(
                        state.param.as_float_slice(),
                        problem.lower().as_float_slice(),
                        problem.upper().as_float_slice(),
                        &work.d,
                    )
                }
            } else {
                1.0e10
            };

            let alpha_init = if state.iter == 0 && !boxed {
                (1.0_f64 / dnorm).min(stpmx)
            } else {
                1.0
            };

            // Build a V-typed direction for the line search.
            let mut d_v = state.param.clone();
            d_v.as_float_slice_mut().copy_from_slice(&work.d);

            // Save previous iterate before stepping (Fortran `t = x;
            // r = g`).
            work.t_buf.copy_from_slice(state.param.as_float_slice());
            work.r.copy_from_slice(g_v.as_float_slice());
            work.gdold = work
                .d
                .iter()
                .zip(g_v.as_float_slice())
                .map(|(a, b)| a * b)
                .sum();

            // Drive the line search. Fortran `lnsrlb` sets the
            // initial trial step and the feasibility cap on
            // `dcsrch`'s `stp` / `stpmax`; we don't have a generic
            // hook for that on [`LineSearch`], so for now we let the
            // configured line search keep its own initial / max-step
            // settings. Parity holds on the Rosenbrock 5D fixture
            // because the natural Newton step stays interior, but
            // tight-bound problems may need a constraint-aware
            // wrapper down the road.
            let _ = (alpha_init, stpmx);
            let ls_result = self
                .line_search
                .next(problem, &state.param, f_old, &g_v, &d_v);
            state.cost_evals += ls_result.cost_evals;
            state.gradient_evals += ls_result.gradient_evals;

            let stp = ls_result.alpha;
            if !(stp.is_finite() && stp > 0.0) {
                // Line search bailed. If col == 0, abnormal
                // termination — there's no compact-form state to
                // reset. Otherwise restart with cleared history. The
                // clone of `g_v` is fine: we're on the cold-path exit
                // either way.
                state.gradient = Some(g_v.clone());
                state.cost = Some(f_old);
                if state.ws.is_empty() {
                    return (state, Some(TerminationReason::SolverFailed));
                }
                if try_restart_after_lnsrch(&mut state, &mut restart_budget) {
                    continue;
                } else {
                    return (state, Some(TerminationReason::SolverFailed));
                }
            }

            // Apply the step. x ← x + stp · d.
            state.param.scaled_add(stp, &d_v);

            // Recompute f, g at the new iterate. (MoreThuente
            // discards the final trial's values; cleanest workaround
            // is one extra cost+grad eval per iter.)
            let f_new = problem.cost(&state.param);
            let g_new = problem.gradient(&state.param);
            state.cost_evals += 1;
            state.gradient_evals += 1;

            // -------------------------------------------------------
            // Phase F — limited-memory update. Curvature check
            // matches Fortran's `dr ≤ epsmch · ddum`.
            // -------------------------------------------------------
            // s = stp · d  (in slice form, d holds the unscaled
            // direction; the s vector lives in `d` scaled by stp).
            // y = g_new − g_old.
            // dr = y · s, ddum = −gdold · stp (Fortran convention).
            let work = state.work.as_mut().unwrap();
            let g_new_slice = g_new.as_float_slice();
            let g_old_slice = work.r.as_slice(); // saved earlier

            let mut dr = 0.0;
            for i in 0..n {
                let yi = g_new_slice[i] - g_old_slice[i];
                let si = stp * work.d[i];
                dr += yi * si;
            }
            let ddum = -work.gdold * stp;

            if dr > self.epsilon * ddum.abs() {
                // Accept the (s, y) pair. Build s, y as V then push.
                let mut s_v = state.param.clone();
                let s_slice = s_v.as_float_slice_mut();
                for i in 0..n {
                    s_slice[i] = stp * work.d[i];
                }
                let mut y_v = g_new.clone();
                let y_slice = y_v.as_float_slice_mut();
                for i in 0..n {
                    y_slice[i] = g_new_slice[i] - g_old_slice[i];
                }
                let appended = state.append_pair(s_v, y_v);
                if appended {
                    let work = state.work.as_mut().unwrap();
                    work.updatd = true;
                    work.iupdat = work.iupdat.saturating_add(1);

                    // Rebuild T = θ SᵀS + L D⁻¹ Lᵀ. On failure, reset.
                    let new_col = state.ws.len();
                    if formt(state.theta, &state.sy, &state.ss, new_col, m, &mut work.wt).is_err() {
                        // Reset history; the next iter starts fresh.
                        state.ws.clear();
                        state.wy.clear();
                        for v in state.sy.iter_mut() {
                            *v = 0.0;
                        }
                        for v in state.ss.iter_mut() {
                            *v = 0.0;
                        }
                        state.theta = 1.0;
                        work.reset_history();
                    }
                } else {
                    // append_pair refused (s·y ≤ 0 numerically) —
                    // treat as a skipped update.
                    let work = state.work.as_mut().unwrap();
                    work.updatd = false;
                }
            } else {
                // Skip the update; matches Fortran's `nskip += 1` path.
                let work = state.work.as_mut().unwrap();
                work.updatd = false;
            }

            state.cost = Some(f_new);
            state.gradient = Some(g_new);
            return (state, None);
        }
    }
}

/// Project a candidate `x` into `[l, u]` and initialize `iwhere`
/// per Fortran `active` (`lbfgsb.f:1004`).
fn active_init(
    x: &mut [f64],
    l: &[f64],
    u: &[f64],
    iwhere: &mut [i8],
    cnstnd: &mut bool,
    boxed: &mut bool,
) {
    let n = x.len();
    *cnstnd = false;
    *boxed = true;
    for i in 0..n {
        let lo = l[i];
        let hi = u[i];
        let lower_finite = lo.is_finite();
        let upper_finite = hi.is_finite();
        if lower_finite && x[i] < lo {
            x[i] = lo;
        }
        if upper_finite && x[i] > hi {
            x[i] = hi;
        }
        if !(lower_finite && upper_finite) {
            *boxed = false;
        }
        if !lower_finite && !upper_finite {
            iwhere[i] = iwh::ALWAYS_FREE;
        } else {
            *cnstnd = true;
            if lower_finite && upper_finite && lo == hi {
                iwhere[i] = iwh::ALWAYS_FIXED;
            } else {
                iwhere[i] = iwh::FREE_MOVED;
            }
        }
    }
}

/// Infinity-norm of the projected gradient (Fortran `projgr`,
/// `lbfgsb.f:2942`).
fn projected_gradient_norm(x: &[f64], g: &[f64], l: &[f64], u: &[f64]) -> f64 {
    let mut sbgnrm = 0.0_f64;
    for i in 0..x.len() {
        let mut gi = g[i];
        let lower_finite = l[i].is_finite();
        let upper_finite = u[i].is_finite();
        if lower_finite || upper_finite {
            if gi < 0.0 {
                if upper_finite {
                    gi = (x[i] - u[i]).max(gi);
                }
            } else if lower_finite {
                gi = (x[i] - l[i]).min(gi);
            }
        }
        sbgnrm = sbgnrm.max(gi.abs());
    }
    sbgnrm
}

/// Largest feasible step `stpmx` along `d` such that `x + stpmx · d`
/// stays inside `[l, u]`. Fortran `lnsrlb` initial-step calculation
/// (`lbfgsb.f:2511-2530`).
fn feasible_step_cap(x: &[f64], l: &[f64], u: &[f64], d: &[f64]) -> f64 {
    let mut stpmx = 1.0e10_f64;
    for i in 0..x.len() {
        let di = d[i];
        let lower_finite = l[i].is_finite();
        let upper_finite = u[i].is_finite();
        if di < 0.0 && lower_finite {
            let gap = l[i] - x[i];
            if gap >= 0.0 {
                stpmx = 0.0;
            } else if di * stpmx < gap {
                stpmx = gap / di;
            }
        } else if di > 0.0 && upper_finite {
            let gap = u[i] - x[i];
            if gap <= 0.0 {
                stpmx = 0.0;
            } else if di * stpmx > gap {
                stpmx = gap / di;
            }
        }
    }
    stpmx
}

/// Count entering / leaving variables and rebuild the free + active
/// partition in `index`. Port of Fortran `freev` (`lbfgsb.f:2241`).
///
/// Returns `(nfree, nenter, ileave)` where `indx2[0..nenter]` holds
/// the entering variables and `indx2[ileave..n]` the leaving ones.
fn freev(
    n: usize,
    iwhere: &[i8],
    index: &mut [usize],
    indx2: &mut [usize],
    prev_nfree: usize,
    iter: u64,
    cnstnd: bool,
) -> (usize, usize, usize) {
    let mut nenter = 0usize;
    let mut ileave = n;
    if iter > 0 && cnstnd {
        // Variables that were free, now active ⇒ leaving.
        for i in 0..prev_nfree {
            let k = index[i];
            if iwhere[k] > 0 {
                ileave -= 1;
                indx2[ileave] = k;
            }
        }
        // Variables that were active, now free ⇒ entering.
        for i in prev_nfree..n {
            let k = index[i];
            if iwhere[k] <= 0 {
                indx2[nenter] = k;
                nenter += 1;
            }
        }
    }
    // Rebuild free + active partition.
    let mut nfree = 0usize;
    let mut iact = n;
    for i in 0..n {
        if iwhere[i] <= 0 {
            index[nfree] = i;
            nfree += 1;
        } else {
            iact -= 1;
            index[iact] = i;
        }
    }
    (nfree, nenter, ileave)
}

/// Compute the reduced gradient `r = −Z'B(xcp − x) − Z'g` for the
/// free subspace (Fortran `cmprlb`, `lbfgsb.f:1720`). Writes
/// `r[0..nfree]`; consumes the compact-form Cauchy correction stored
/// in `wa_c` (= `W'(xcp − x)` from cauchy).
#[allow(clippy::too_many_arguments)]
fn cmprlb<V>(
    x: &[f64],
    g: &[f64],
    z: &[f64],
    r: &mut [f64],
    wa_c: &mut [f64],
    wa_p: &mut [f64],
    sy: &[f64],
    wt: &[f64],
    ws: &[V],
    wy: &[V],
    index: &[usize],
    nfree: usize,
    cnstnd: bool,
    col: usize,
    theta: f64,
    m: usize,
) -> Result<(), ()>
where
    V: AsFloatSlice,
{
    if !cnstnd && col > 0 {
        for i in 0..x.len() {
            r[i] = -g[i];
        }
        return Ok(());
    }
    for i in 0..nfree {
        let k = index[i];
        r[i] = -theta * (z[k] - x[k]) - g[k];
    }
    // Apply M⁻¹ to `wa_c` → `wa_p`, then add the correction.
    bmv(sy, wt, col, m, wa_c, wa_p).map_err(|_| ())?;
    for j in 0..col {
        let a1 = wa_p[j];
        let a2 = theta * wa_p[col + j];
        let wy_j = wy[j].as_float_slice();
        let ws_j = ws[j].as_float_slice();
        for i in 0..nfree {
            let k = index[i];
            r[i] += wy_j[k] * a1 + ws_j[k] * a2;
        }
    }
    Ok(())
}

/// Reset the limited-memory history of `state` and bail or continue
/// based on `restart_budget`. Returns `true` if a restart was budgeted
/// and the caller should `continue` the outer loop, `false` if budget
/// was exhausted.
fn try_restart<V>(state: &mut LbfgsState<V>, g_v: &V, f_old: f64, restart_budget: &mut u8) -> bool
where
    V: Clone,
{
    if *restart_budget == 0 {
        // Restore the cached gradient / cost so the state stays
        // consistent for the caller.
        state.gradient = Some(g_v.clone());
        state.cost = Some(f_old);
        return false;
    }
    *restart_budget -= 1;
    // Clear history & reset theta.
    state.ws.clear();
    state.wy.clear();
    for v in state.sy.iter_mut() {
        *v = 0.0;
    }
    for v in state.ss.iter_mut() {
        *v = 0.0;
    }
    state.theta = 1.0;
    if let Some(work) = state.work.as_mut() {
        work.reset_history();
    }
    true
}

/// Same as `try_restart`, but used after the line search has already
/// applied side effects we need to leave intact (state.gradient / cost
/// already restored by the caller).
fn try_restart_after_lnsrch<V>(state: &mut LbfgsState<V>, restart_budget: &mut u8) -> bool {
    if *restart_budget == 0 {
        return false;
    }
    *restart_budget -= 1;
    state.ws.clear();
    state.wy.clear();
    for v in state.sy.iter_mut() {
        *v = 0.0;
    }
    for v in state.ss.iter_mut() {
        *v = 0.0;
    }
    state.theta = 1.0;
    if let Some(work) = state.work.as_mut() {
        work.reset_history();
    }
    true
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::core::executor::Executor;
    use crate::core::termination::{MaxIter, ProjectedGradientTolerance};

    /// Smoke test on a small box-constrained quadratic. The problem
    /// `f(x) = (x − c)ᵀ(x − c)` with bounds `[l, u]` has minimizer
    /// `clamp(c, l, u)`. Verifies the full pipeline (cauchy + subsm +
    /// formk + line search + matupd) returns the analytical answer.
    #[test]
    fn shifted_quadratic_in_box_converges_to_clamp() {
        use crate::core::constraint::BoxConstrained;
        use crate::core::problem::{CostFunction, Gradient};

        struct Quad {
            c: Vec<f64>,
            l: Vec<f64>,
            u: Vec<f64>,
        }
        impl CostFunction for Quad {
            type Param = Vec<f64>;
            type Output = f64;
            fn cost(&self, x: &Vec<f64>) -> f64 {
                x.iter().zip(&self.c).map(|(a, b)| (a - b).powi(2)).sum()
            }
        }
        impl Gradient for Quad {
            type Param = Vec<f64>;
            type Gradient = Vec<f64>;
            fn gradient(&self, x: &Vec<f64>) -> Vec<f64> {
                x.iter().zip(&self.c).map(|(a, b)| 2.0 * (a - b)).collect()
            }
        }
        impl BoxConstrained for Quad {
            fn lower(&self) -> &Vec<f64> {
                &self.l
            }
            fn upper(&self) -> &Vec<f64> {
                &self.u
            }
        }

        // Unconstrained minimizer at (3, -1); both clipped by bounds.
        let problem = Quad {
            c: vec![3.0, -1.0],
            l: vec![0.0, 0.0],
            u: vec![2.0, 2.0],
        };

        let state = LbfgsState::new(vec![1.0, 1.0], 5);
        let solver = LBFGSB::new();
        let lower = problem.lower().clone();
        let upper = problem.upper().clone();
        let result = Executor::new(problem, solver, state)
            .terminate_on(MaxIter(50))
            .terminate_on(ProjectedGradientTolerance::new(lower, upper, 1e-10))
            .run();
        let final_x = result.state.param.clone();
        // Optimum: clamp((3, -1), [0,0], [2, 2]) = (2, 0).
        assert!((final_x[0] - 2.0).abs() < 1e-6, "x0 = {}", final_x[0]);
        assert!(final_x[1].abs() < 1e-6, "x1 = {}", final_x[1]);
    }

    /// Unbounded `−∞ ≤ x ≤ +∞` problem: behaves as L-BFGS on
    /// Rosenbrock 2D. The compact-form path with `cnstnd == false`
    /// skips GCP entirely after the first iteration.
    #[test]
    fn unbounded_rosenbrock_2d_converges() {
        use crate::core::constraint::BoxConstrained;
        use crate::core::problem::{CostFunction, Gradient};

        struct Rosen {
            l: Vec<f64>,
            u: Vec<f64>,
        }
        impl CostFunction for Rosen {
            type Param = Vec<f64>;
            type Output = f64;
            fn cost(&self, x: &Vec<f64>) -> f64 {
                (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2)
            }
        }
        impl Gradient for Rosen {
            type Param = Vec<f64>;
            type Gradient = Vec<f64>;
            fn gradient(&self, x: &Vec<f64>) -> Vec<f64> {
                let dfdx0 = -2.0 * (1.0 - x[0]) - 400.0 * x[0] * (x[1] - x[0] * x[0]);
                let dfdx1 = 200.0 * (x[1] - x[0] * x[0]);
                vec![dfdx0, dfdx1]
            }
        }
        impl BoxConstrained for Rosen {
            fn lower(&self) -> &Vec<f64> {
                &self.l
            }
            fn upper(&self) -> &Vec<f64> {
                &self.u
            }
        }

        let problem = Rosen {
            l: vec![f64::NEG_INFINITY; 2],
            u: vec![f64::INFINITY; 2],
        };
        let state = LbfgsState::new(vec![-1.2, 1.0], 5);
        let solver = LBFGSB::new();
        let lower = problem.lower().clone();
        let upper = problem.upper().clone();
        let result = Executor::new(problem, solver, state)
            .terminate_on(MaxIter(200))
            .terminate_on(ProjectedGradientTolerance::new(lower, upper, 1e-8))
            .run();
        let final_x = result.state.param.clone();
        assert!(
            (final_x[0] - 1.0).abs() < 1e-3 && (final_x[1] - 1.0).abs() < 1e-3,
            "x = {:?}",
            final_x
        );
    }
}