Skip to main content

pounce_algorithm/
batch.rs

1//! Batched NLP solving (pounce#126) — N independent NLPs on a rayon
2//! pool.
3//!
4//! The NLP analog of `pounce-convex`'s `solve_qp_batch` /
5//! `solve_qp_batch_parallel`: solve a batch of independent problems
6//! (parametric sweeps, multi-start, branch-and-bound node relaxations)
7//! and return one result per input, in input order. Each instance runs
8//! the full filter-IPM end-to-end via its own [`IpoptApplication`].
9//!
10//! # Parallelism model: outer-parallel, inner-serial
11//!
12//! Same rationale as the QP batch (`pounce-convex/src/batch.rs`): each
13//! NLP solve is fully independent, so the batch is embarrassingly
14//! parallel *across instances* — but the default FERAL factorization
15//! backend is itself rayon-parallel *within* a factor, and running
16//! both levels oversubscribes the cores. [`solve_nlp_batch_parallel`]
17//! therefore builds everything per worker, and the `configure` hook
18//! should install an **inner-serial** linear-solver backend;
19//! [`install_serial_feral_backend`] does exactly that (it honors the
20//! app's `feral_*` options, forcing only `parallel = off` — a
21//! per-backend setting, not global state). Unlike the QP batch, NLP
22//! instances converge in wildly different iteration counts; that is
23//! fine for thread-per-instance rayon (a converged instance frees its
24//! worker — no lockstep), and no cross-instance KKT-structure sharing
25//! is attempted because general-NLP sparsity may differ per instance.
26//!
27//! # Construction happens on the worker
28//!
29//! `pounce-nlp`'s solver plumbing is single-threaded
30//! (`Rc<RefCell<…>>`), so nothing solver-side crosses a thread
31//! boundary: each worker receives one owned `T: TNLP + Send` (e.g. a
32//! `pounce_nl::nl_reader::NlTnlp`, which is `Send` since its CSE nodes
33//! went `Arc`), constructs its own [`IpoptApplication`] *inside* the
34//! worker closure, and only the plain-data [`NlpBatchResult`] comes
35//! back.
36//!
37//! # The `configure` hook
38//!
39//! `pounce-algorithm` cannot depend on `pounce-restoration` (the dep
40//! edge runs the other way), so the per-worker application is handed
41//! to a caller-supplied `configure` closure that should, at minimum,
42//! mirror what single-solve drivers do: set options, then install a
43//! linear-backend factory and a restoration-factory provider. Without
44//! a restoration provider an instance that needs the restoration
45//! phase returns `RestorationFailure` instead of recovering — same as
46//! a bare `IpoptApplication`. See `pounce-cli`'s solve setup or
47//! `pounce-py`'s `Problem::prepare` for the full recipe.
48
49use crate::application::IpoptApplication;
50use pounce_common::types::{Index, Number};
51use pounce_linsol::{EMatrixFormat, ESymSolverStatus, SparseSymLinearSolverInterface};
52use pounce_nlp::alg_types::SolverReturn;
53use pounce_nlp::return_codes::ApplicationReturnStatus;
54use pounce_nlp::solve_statistics::SolveStatistics;
55use pounce_nlp::tnlp::{
56    BoundsInfo, IpoptCq, IpoptData, IterStats, Linearity, MetaData, NlpInfo, ScalingRequest,
57    Solution, SparsityRequest, StartingPoint, TNLP,
58};
59use rayon::prelude::*;
60use std::cell::RefCell;
61use std::collections::HashMap;
62use std::rc::Rc;
63use std::sync::{Arc, Mutex};
64use std::thread::{self, ThreadId};
65
66/// Final iterate of one batch instance, captured from the
67/// `finalize_solution` callback (owned copies of the borrowed
68/// buffers).
69#[derive(Debug, Clone)]
70pub struct NlpBatchSolution {
71    /// Algorithm-level termination status (finer-grained than the
72    /// application-level [`NlpBatchResult::status`]).
73    pub solver_status: SolverReturn,
74    pub x: Vec<Number>,
75    /// Lower / upper bound multipliers.
76    pub z_l: Vec<Number>,
77    pub z_u: Vec<Number>,
78    /// Constraint values `g(x)` at the final iterate.
79    pub g: Vec<Number>,
80    /// Constraint multipliers.
81    pub lambda: Vec<Number>,
82    pub obj: Number,
83}
84
85/// Outcome of one instance of a batched solve.
86#[derive(Debug, Clone)]
87pub struct NlpBatchResult {
88    pub status: ApplicationReturnStatus,
89    /// `None` when the solve aborted before `finalize_solution` ran
90    /// (e.g. `InvalidProblemDefinition`).
91    pub solution: Option<NlpBatchSolution>,
92    /// Per-instance solve statistics (iteration count, final KKT
93    /// error, timings, …).
94    pub stats: SolveStatistics,
95}
96
97/// Delegating wrapper that records the `finalize_solution` payload so
98/// the batch driver can return it as owned data. Every other `TNLP`
99/// method forwards to the wrapped instance unchanged.
100struct CaptureTnlp<T: TNLP> {
101    inner: T,
102    captured: Option<NlpBatchSolution>,
103}
104
105impl<T: TNLP> TNLP for CaptureTnlp<T> {
106    fn get_nlp_info(&mut self) -> Option<NlpInfo> {
107        self.inner.get_nlp_info()
108    }
109    fn get_bounds_info(&mut self, b: BoundsInfo<'_>) -> bool {
110        self.inner.get_bounds_info(b)
111    }
112    fn get_starting_point(&mut self, sp: StartingPoint<'_>) -> bool {
113        self.inner.get_starting_point(sp)
114    }
115    fn eval_f(&mut self, x: &[Number], new_x: bool) -> Option<Number> {
116        self.inner.eval_f(x, new_x)
117    }
118    fn eval_grad_f(&mut self, x: &[Number], new_x: bool, grad_f: &mut [Number]) -> bool {
119        self.inner.eval_grad_f(x, new_x, grad_f)
120    }
121    fn eval_g(&mut self, x: &[Number], new_x: bool, g: &mut [Number]) -> bool {
122        self.inner.eval_g(x, new_x, g)
123    }
124    fn eval_jac_g(&mut self, x: Option<&[Number]>, new_x: bool, mode: SparsityRequest<'_>) -> bool {
125        self.inner.eval_jac_g(x, new_x, mode)
126    }
127    fn eval_h(
128        &mut self,
129        x: Option<&[Number]>,
130        new_x: bool,
131        obj_factor: Number,
132        lambda: Option<&[Number]>,
133        new_lambda: bool,
134        mode: SparsityRequest<'_>,
135    ) -> bool {
136        self.inner
137            .eval_h(x, new_x, obj_factor, lambda, new_lambda, mode)
138    }
139    fn finalize_solution(&mut self, sol: Solution<'_>, ip_data: &IpoptData, ip_cq: &IpoptCq) {
140        self.captured = Some(NlpBatchSolution {
141            solver_status: sol.status,
142            x: sol.x.to_vec(),
143            z_l: sol.z_l.to_vec(),
144            z_u: sol.z_u.to_vec(),
145            g: sol.g.to_vec(),
146            lambda: sol.lambda.to_vec(),
147            obj: sol.obj_value,
148        });
149        self.inner.finalize_solution(sol, ip_data, ip_cq);
150    }
151    fn get_var_con_metadata(&mut self, var: &mut MetaData, con: &mut MetaData) -> bool {
152        self.inner.get_var_con_metadata(var, con)
153    }
154    fn get_scaling_parameters(&mut self, req: ScalingRequest<'_>) -> bool {
155        self.inner.get_scaling_parameters(req)
156    }
157    fn get_variables_linearity(&mut self, types: &mut [Linearity]) -> bool {
158        self.inner.get_variables_linearity(types)
159    }
160    fn get_objective_variables_linearity(&mut self, types: &mut [Linearity]) -> bool {
161        self.inner.get_objective_variables_linearity(types)
162    }
163    fn get_constraints_linearity(&mut self, types: &mut [Linearity]) -> bool {
164        self.inner.get_constraints_linearity(types)
165    }
166    fn get_number_of_nonlinear_variables(&mut self) -> pounce_common::types::Index {
167        self.inner.get_number_of_nonlinear_variables()
168    }
169    fn get_list_of_nonlinear_variables(
170        &mut self,
171        pos_nonlin_vars: &mut [pounce_common::types::Index],
172    ) -> bool {
173        self.inner.get_list_of_nonlinear_variables(pos_nonlin_vars)
174    }
175    fn intermediate_callback(
176        &mut self,
177        stats: IterStats,
178        ip_data: &IpoptData,
179        ip_cq: &IpoptCq,
180    ) -> bool {
181        self.inner.intermediate_callback(stats, ip_data, ip_cq)
182    }
183    fn finalize_metadata(&mut self, var: &MetaData, con: &MetaData) {
184        self.inner.finalize_metadata(var, con)
185    }
186}
187
188/// Install an inner-serial FERAL backend on `app`, honoring any
189/// `feral_*` options already set (read the options *before* calling
190/// this if `configure` sets them — i.e. set options first, then call
191/// this). The `parallel = off` toggle is per-backend; concurrent
192/// solves on other threads are unaffected.
193pub fn install_serial_feral_backend(app: &mut IpoptApplication) {
194    let mut cfg = crate::application::feral_config_from_options(app.options());
195    cfg.parallel = Some(false);
196    app.set_linear_backend_factory(Box::new(move |_choice| {
197        Box::new(pounce_feral::FeralSolverInterface::with_config(cfg.clone()))
198    }));
199}
200
201/// Per-thread pool of serial FERAL backends for **identical-sparsity**
202/// batches (the issue-#126 "optional later optimization": reuse the
203/// symbolic analysis across instances).
204///
205/// FERAL's `Solver` keys its symbolic factorization (fill-reducing
206/// ordering + supernode structure) on a pattern fingerprint and reuses
207/// it across `factor()` calls — that is what already amortizes the
208/// symbolic cost over a single instance's IPM iterations. A fresh
209/// backend per instance throws that cache away between instances; this
210/// pool instead parks each worker thread's backend when its solve
211/// finishes and hands it to the next instance scheduled on that
212/// thread. When the next instance's KKT pattern is identical, its
213/// first factorization hits the cached symbolic; when it differs, the
214/// fingerprint mismatch triggers a fresh analysis — so pooling is
215/// *always correct*, just only *profitable* for shared-structure
216/// sweeps.
217///
218/// **Determinism caveat (why this is opt-in):** the pooled solver
219/// carries value-adjacent state across instances — most notably the
220/// MC64 scaling cache (reused only inside its validity bound) and any
221/// pivot-quality escalation a previous instance triggered. Solutions
222/// still satisfy the same tolerances, but are not guaranteed
223/// bit-identical to fresh-backend solves. The default batch entry
224/// points keep one fresh backend per instance.
225///
226/// Construct once per batch ([`Self::serial`]), share the `Arc` with
227/// each worker's `configure` via
228/// [`install_pooled_serial_feral_backend`]. Restoration-phase inner
229/// solves keep their own fresh backends. Pooled solvers (and their
230/// cached factors) free when the `Arc` drops.
231pub struct FeralBackendPool {
232    cfg: pounce_feral::FeralConfig,
233    slots: Mutex<HashMap<ThreadId, pounce_feral::FeralSolverInterface>>,
234}
235
236impl FeralBackendPool {
237    /// A pool minting inner-serial backends with `cfg` (its `parallel`
238    /// field is forced off — pooling exists for the outer-parallel /
239    /// inner-serial batch shape).
240    pub fn serial(mut cfg: pounce_feral::FeralConfig) -> Arc<Self> {
241        cfg.parallel = Some(false);
242        Arc::new(Self {
243            cfg,
244            slots: Mutex::new(HashMap::new()),
245        })
246    }
247
248    /// Take this thread's parked backend (or mint a fresh one). The
249    /// returned guard parks it back on drop, on the same thread.
250    fn acquire(self: &Arc<Self>) -> PooledFeralBackend {
251        let recycled = self
252            .slots
253            .lock()
254            .ok()
255            .and_then(|mut s| s.remove(&thread::current().id()));
256        let inner = recycled
257            .unwrap_or_else(|| pounce_feral::FeralSolverInterface::with_config(self.cfg.clone()));
258        PooledFeralBackend {
259            inner: Some(inner),
260            pool: Arc::clone(self),
261        }
262    }
263}
264
265impl std::fmt::Debug for FeralBackendPool {
266    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
267        let parked = self.slots.lock().map(|s| s.len()).unwrap_or(0);
268        f.debug_struct("FeralBackendPool")
269            .field("parked", &parked)
270            .finish_non_exhaustive()
271    }
272}
273
274/// RAII guard around a pooled [`pounce_feral::FeralSolverInterface`]:
275/// delegates the backend trait verbatim and parks the solver back into
276/// its pool slot when the solve's ownership chain drops it. If the
277/// slot is already occupied (two live backends on one thread — e.g. a
278/// nested factory call), the extra solver is simply dropped: pooling
279/// degrades to fresh-per-instance, never to shared-mutable state.
280struct PooledFeralBackend {
281    inner: Option<pounce_feral::FeralSolverInterface>,
282    pool: Arc<FeralBackendPool>,
283}
284
285impl PooledFeralBackend {
286    fn get(&mut self) -> &mut pounce_feral::FeralSolverInterface {
287        // Invariant: `inner` is `Some` from construction until drop.
288        #[allow(clippy::expect_used)]
289        self.inner.as_mut().expect("pooled backend already taken")
290    }
291}
292
293impl Drop for PooledFeralBackend {
294    fn drop(&mut self) {
295        if let (Some(solver), Ok(mut slots)) = (self.inner.take(), self.pool.slots.lock()) {
296            slots.entry(thread::current().id()).or_insert(solver);
297        }
298    }
299}
300
301impl SparseSymLinearSolverInterface for PooledFeralBackend {
302    fn initialize_structure(
303        &mut self,
304        dim: Index,
305        nonzeros: Index,
306        ia: &[Index],
307        ja: &[Index],
308    ) -> ESymSolverStatus {
309        self.get().initialize_structure(dim, nonzeros, ia, ja)
310    }
311    fn values_array_mut(&mut self) -> &mut [Number] {
312        self.get().values_array_mut()
313    }
314    fn multi_solve(
315        &mut self,
316        new_matrix: bool,
317        ia: &[Index],
318        ja: &[Index],
319        nrhs: Index,
320        rhs_vals: &mut [Number],
321        check_neg_evals: bool,
322        number_of_neg_evals: Index,
323    ) -> ESymSolverStatus {
324        self.get().multi_solve(
325            new_matrix,
326            ia,
327            ja,
328            nrhs,
329            rhs_vals,
330            check_neg_evals,
331            number_of_neg_evals,
332        )
333    }
334    fn number_of_neg_evals(&self) -> Index {
335        match &self.inner {
336            Some(s) => s.number_of_neg_evals(),
337            None => 0,
338        }
339    }
340    fn increase_quality(&mut self) -> bool {
341        self.get().increase_quality()
342    }
343    fn provides_inertia(&self) -> bool {
344        self.inner.as_ref().is_some_and(|s| s.provides_inertia())
345    }
346    fn matrix_format(&self) -> EMatrixFormat {
347        match &self.inner {
348            Some(s) => s.matrix_format(),
349            None => EMatrixFormat::TripletFormat,
350        }
351    }
352    fn provides_degeneracy_detection(&self) -> bool {
353        self.inner
354            .as_ref()
355            .is_some_and(|s| s.provides_degeneracy_detection())
356    }
357    fn determine_dependent_rows(
358        &mut self,
359        n_rows: Index,
360        n_cols: Index,
361        irn: &[Index],
362        jcn: &[Index],
363        vals: &[Number],
364        c_deps: &mut Vec<Index>,
365    ) -> ESymSolverStatus {
366        self.get()
367            .determine_dependent_rows(n_rows, n_cols, irn, jcn, vals, c_deps)
368    }
369    fn factor_pattern(&self, want_values: bool) -> Option<pounce_linsol::FactorPattern> {
370        self.inner
371            .as_ref()
372            .and_then(|s| s.factor_pattern(want_values))
373    }
374}
375
376/// Install a backend factory drawing from `pool` — the
377/// identical-sparsity batch optimization (see [`FeralBackendPool`] for
378/// the reuse semantics and the determinism caveat). Call from
379/// `configure` *instead of* [`install_serial_feral_backend`].
380pub fn install_pooled_serial_feral_backend(
381    app: &mut IpoptApplication,
382    pool: &Arc<FeralBackendPool>,
383) {
384    let pool = Arc::clone(pool);
385    app.set_linear_backend_factory(Box::new(move |_choice| Box::new(pool.acquire())));
386}
387
388/// Per-instance warm-start iterate for [`solve_nlp_batch_parallel_warm`]:
389/// primal point plus the three dual vectors the IPM's warm-start
390/// initializer consumes via `TNLP::get_starting_point`. Build one from
391/// a previous batch's [`NlpBatchSolution`] (the `From` impl) for MPC /
392/// sequential-chaining workloads.
393#[derive(Debug, Clone, Default)]
394pub struct NlpWarmStart {
395    pub x: Vec<Number>,
396    /// Constraint multipliers λ.
397    pub lambda: Vec<Number>,
398    /// Lower / upper bound multipliers.
399    pub z_l: Vec<Number>,
400    pub z_u: Vec<Number>,
401    /// Barrier parameter μ to resume from — typically the previous
402    /// solve's final μ ([`SolveStatistics::final_mu`]). When `Some`,
403    /// the warm batch sets `mu_init` to it (floored at `1e-9`, and
404    /// only if the caller's `configure` didn't set `mu_init`
405    /// explicitly), so the IPM continues near the converged barrier
406    /// instead of recentering at the cold default `0.1` — without
407    /// this, a warm start from a near-optimal point can *gain*
408    /// iterations walking back out to the central path.
409    pub mu: Option<Number>,
410}
411
412impl From<&NlpBatchSolution> for NlpWarmStart {
413    fn from(sol: &NlpBatchSolution) -> Self {
414        Self {
415            x: sol.x.clone(),
416            lambda: sol.lambda.clone(),
417            z_l: sol.z_l.clone(),
418            z_u: sol.z_u.clone(),
419            mu: None,
420        }
421    }
422}
423
424/// Build a warm start from a full batch result: iterate + duals from
425/// the captured solution, μ from the statistics. An instance whose
426/// solve produced no solution yields an empty warm start, which the
427/// next solve treats as a cold start (dimension-mismatch fallback).
428impl From<&NlpBatchResult> for NlpWarmStart {
429    fn from(r: &NlpBatchResult) -> Self {
430        match &r.solution {
431            Some(sol) => Self {
432                mu: Some(r.stats.final_mu),
433                ..Self::from(sol)
434            },
435            None => Self::default(),
436        }
437    }
438}
439
440/// Floor for warm-start `mu_init` threading (see [`NlpWarmStart::mu`]).
441const WARM_MU_FLOOR: Number = 1e-9;
442
443/// Apply the warm-start options to a configured per-worker app:
444/// always force `warm_start_init_point=yes` (the point of the warm
445/// entry), and thread the instance's resumed μ into `mu_init` unless
446/// the caller's `configure` already set one explicitly.
447fn apply_warm_options(app: &mut IpoptApplication, mu: Option<Number>) {
448    let _ = app
449        .options_mut()
450        .set_string_value("warm_start_init_point", "yes", true, false);
451    if let Some(mu) = mu {
452        let user_set_mu = matches!(
453            app.options().get_numeric_value("mu_init", ""),
454            Ok((_, true))
455        );
456        if !user_set_mu {
457            let _ =
458                app.options_mut()
459                    .set_numeric_value("mu_init", mu.max(WARM_MU_FLOOR), true, false);
460        }
461    }
462}
463
464/// Delegating wrapper that serves a [`NlpWarmStart`] from
465/// `get_starting_point` (primal always; duals when the initializer
466/// asks, i.e. under `warm_start_init_point=yes`). Any length mismatch
467/// against the wrapped problem falls back to the inner TNLP's own
468/// starting point — a per-instance cold start, mirroring the QP batch's
469/// warm-start contract. Every other method forwards unchanged.
470struct WarmStartTnlp<T: TNLP> {
471    inner: T,
472    warm: NlpWarmStart,
473}
474
475impl<T: TNLP> TNLP for WarmStartTnlp<T> {
476    fn get_nlp_info(&mut self) -> Option<NlpInfo> {
477        self.inner.get_nlp_info()
478    }
479    fn get_bounds_info(&mut self, b: BoundsInfo<'_>) -> bool {
480        self.inner.get_bounds_info(b)
481    }
482    fn get_starting_point(&mut self, sp: StartingPoint<'_>) -> bool {
483        let dims_ok = self.warm.x.len() == sp.x.len()
484            && (!sp.init_lambda || self.warm.lambda.len() == sp.lambda.len())
485            && (!sp.init_z
486                || (self.warm.z_l.len() == sp.z_l.len() && self.warm.z_u.len() == sp.z_u.len()));
487        if !dims_ok {
488            return self.inner.get_starting_point(sp);
489        }
490        if sp.init_x {
491            sp.x.copy_from_slice(&self.warm.x);
492        }
493        if sp.init_z {
494            sp.z_l.copy_from_slice(&self.warm.z_l);
495            sp.z_u.copy_from_slice(&self.warm.z_u);
496        }
497        if sp.init_lambda {
498            sp.lambda.copy_from_slice(&self.warm.lambda);
499        }
500        true
501    }
502    fn eval_f(&mut self, x: &[Number], new_x: bool) -> Option<Number> {
503        self.inner.eval_f(x, new_x)
504    }
505    fn eval_grad_f(&mut self, x: &[Number], new_x: bool, grad_f: &mut [Number]) -> bool {
506        self.inner.eval_grad_f(x, new_x, grad_f)
507    }
508    fn eval_g(&mut self, x: &[Number], new_x: bool, g: &mut [Number]) -> bool {
509        self.inner.eval_g(x, new_x, g)
510    }
511    fn eval_jac_g(&mut self, x: Option<&[Number]>, new_x: bool, mode: SparsityRequest<'_>) -> bool {
512        self.inner.eval_jac_g(x, new_x, mode)
513    }
514    fn eval_h(
515        &mut self,
516        x: Option<&[Number]>,
517        new_x: bool,
518        obj_factor: Number,
519        lambda: Option<&[Number]>,
520        new_lambda: bool,
521        mode: SparsityRequest<'_>,
522    ) -> bool {
523        self.inner
524            .eval_h(x, new_x, obj_factor, lambda, new_lambda, mode)
525    }
526    fn finalize_solution(&mut self, sol: Solution<'_>, ip_data: &IpoptData, ip_cq: &IpoptCq) {
527        self.inner.finalize_solution(sol, ip_data, ip_cq)
528    }
529    fn get_var_con_metadata(&mut self, var: &mut MetaData, con: &mut MetaData) -> bool {
530        self.inner.get_var_con_metadata(var, con)
531    }
532    fn get_scaling_parameters(&mut self, req: ScalingRequest<'_>) -> bool {
533        self.inner.get_scaling_parameters(req)
534    }
535    fn get_variables_linearity(&mut self, types: &mut [Linearity]) -> bool {
536        self.inner.get_variables_linearity(types)
537    }
538    fn get_objective_variables_linearity(&mut self, types: &mut [Linearity]) -> bool {
539        self.inner.get_objective_variables_linearity(types)
540    }
541    fn get_constraints_linearity(&mut self, types: &mut [Linearity]) -> bool {
542        self.inner.get_constraints_linearity(types)
543    }
544    fn get_number_of_nonlinear_variables(&mut self) -> pounce_common::types::Index {
545        self.inner.get_number_of_nonlinear_variables()
546    }
547    fn get_list_of_nonlinear_variables(
548        &mut self,
549        pos_nonlin_vars: &mut [pounce_common::types::Index],
550    ) -> bool {
551        self.inner.get_list_of_nonlinear_variables(pos_nonlin_vars)
552    }
553    fn intermediate_callback(
554        &mut self,
555        stats: IterStats,
556        ip_data: &IpoptData,
557        ip_cq: &IpoptCq,
558    ) -> bool {
559        self.inner.intermediate_callback(stats, ip_data, ip_cq)
560    }
561    fn finalize_metadata(&mut self, var: &MetaData, con: &MetaData) {
562        self.inner.finalize_metadata(var, con)
563    }
564}
565
566/// Solve one instance with a fresh, caller-configured application.
567///
568/// Panic-isolated: a Rust panic raised *during* the solve — a user
569/// `eval_*`/`intermediate_callback` that panics, or a backend assertion
570/// tripping on malformed structure — is caught here and reported as an
571/// [`ApplicationReturnStatus::InternalError`] row. Without this guard the
572/// panic would unwind out of the rayon `map`/`collect` (or the sequential
573/// `map`) and discard *every other instance's* result, so a single bad
574/// instance would poison the whole batch. (Solver-detected failures —
575/// infeasibility, invalid numbers, eval callbacks that *return* failure —
576/// are already reported as ordinary statuses and never reach this path.)
577fn solve_nlp_one<T, C>(index: usize, tnlp: T, configure: &mut C) -> NlpBatchResult
578where
579    T: TNLP + 'static,
580    C: FnMut(usize, &mut IpoptApplication),
581{
582    let mut app = IpoptApplication::new();
583    configure(index, &mut app);
584    let cap = Rc::new(RefCell::new(CaptureTnlp {
585        inner: tnlp,
586        captured: None,
587    }));
588    // `AssertUnwindSafe`: on a caught panic we discard `app`/`cap` without
589    // observing them, so any broken interior-mutability invariant cannot
590    // leak out — only the freshly-built `InternalError` row is returned.
591    let outcome = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
592        let status = app.optimize_tnlp(Rc::clone(&cap) as Rc<RefCell<dyn TNLP>>);
593        let stats = app.statistics();
594        let solution = cap.borrow_mut().captured.take();
595        NlpBatchResult {
596            status,
597            solution,
598            stats,
599        }
600    }));
601    outcome.unwrap_or_else(|_| NlpBatchResult {
602        status: ApplicationReturnStatus::InternalError,
603        solution: None,
604        stats: SolveStatistics::default(),
605    })
606}
607
608/// Solve a batch of independent NLPs **sequentially**, returning one
609/// result per input in input order. `configure` is called once per
610/// instance (receiving the instance index, so per-instance options are
611/// possible) on a fresh [`IpoptApplication`] (set options / backend /
612/// restoration there — see the module docs). Predictable and
613/// contention-free; the right choice when each instance is large
614/// enough that the linear-solver backend parallelizes internally.
615pub fn solve_nlp_batch<T, C>(problems: Vec<T>, mut configure: C) -> Vec<NlpBatchResult>
616where
617    T: TNLP + 'static,
618    C: FnMut(usize, &mut IpoptApplication),
619{
620    problems
621        .into_iter()
622        .enumerate()
623        .map(|(i, t)| solve_nlp_one(i, t, &mut configure))
624        .collect()
625}
626
627/// Solve a batch of independent NLPs **in parallel across instances**
628/// on rayon's global pool, returning one result per input in input
629/// order regardless of completion order. Best for many small / medium
630/// instances where cross-instance throughput beats parallelizing each
631/// factor internally.
632///
633/// Each worker owns its instance end-to-end: the `T: TNLP + Send`
634/// moves in, and the application, backend, and restoration strategy
635/// are all constructed *inside* the worker via `configure` (which must
636/// therefore be `Sync` — it is shared by reference and called once per
637/// instance, with the instance index so per-instance options are
638/// possible). For the outer-parallel / inner-serial win, have
639/// `configure` install an inner-serial backend —
640/// [`install_serial_feral_backend`] after setting options.
641pub fn solve_nlp_batch_parallel<T, C>(problems: Vec<T>, configure: C) -> Vec<NlpBatchResult>
642where
643    T: TNLP + Send + 'static,
644    C: Fn(usize, &mut IpoptApplication) + Sync,
645{
646    problems
647        .into_par_iter()
648        .enumerate()
649        .map(|(i, t)| {
650            solve_nlp_one(i, t, &mut |i: usize, app: &mut IpoptApplication| {
651                configure(i, app)
652            })
653        })
654        .collect()
655}
656
657/// Warm-started **sequential** batch — [`solve_nlp_batch`] seeded per
658/// instance from `warms`. See [`solve_nlp_batch_parallel_warm`] for
659/// the warm-start contract (option forcing, dimension-mismatch
660/// fallback).
661///
662/// # Panics
663/// Panics if `warms.len() != problems.len()`.
664pub fn solve_nlp_batch_warm<T, C>(
665    problems: Vec<T>,
666    warms: Vec<NlpWarmStart>,
667    mut configure: C,
668) -> Vec<NlpBatchResult>
669where
670    T: TNLP + 'static,
671    C: FnMut(usize, &mut IpoptApplication),
672{
673    assert_eq!(
674        warms.len(),
675        problems.len(),
676        "warms.len() ({}) must equal problems.len() ({})",
677        warms.len(),
678        problems.len()
679    );
680    let mus: Vec<Option<Number>> = warms.iter().map(|w| w.mu).collect();
681    let wrapped: Vec<WarmStartTnlp<T>> = problems
682        .into_iter()
683        .zip(warms)
684        .map(|(inner, warm)| WarmStartTnlp { inner, warm })
685        .collect();
686    solve_nlp_batch(wrapped, |i, app: &mut IpoptApplication| {
687        configure(i, app);
688        apply_warm_options(app, mus[i]);
689    })
690}
691
692/// Warm-started parallel batch: like [`solve_nlp_batch_parallel`] but
693/// each instance is seeded from the corresponding entry of `warms` —
694/// typically the previous step's [`NlpBatchSolution`]s for a sequence
695/// of nearby batches (receding-horizon MPC, sequential parameter
696/// continuation, B&B dives). The NLP analog of the QP batch's
697/// `solve_qp_batch_parallel_warm`.
698///
699/// Each worker forces `warm_start_init_point=yes` *after* `configure`
700/// runs (that option is the point of this entry; pair it with
701/// `mu_init` / `warm_start_target_mu` via `configure` for the full
702/// re-optimization effect), then serves the warm iterate through
703/// `get_starting_point`. A warm start only affects an instance's
704/// iteration count, not its solution; a per-instance dimension
705/// mismatch falls back to that instance's own (cold) starting point.
706///
707/// # Panics
708/// Panics if `warms.len() != problems.len()`.
709pub fn solve_nlp_batch_parallel_warm<T, C>(
710    problems: Vec<T>,
711    warms: Vec<NlpWarmStart>,
712    configure: C,
713) -> Vec<NlpBatchResult>
714where
715    T: TNLP + Send + 'static,
716    C: Fn(usize, &mut IpoptApplication) + Sync,
717{
718    assert_eq!(
719        warms.len(),
720        problems.len(),
721        "warms.len() ({}) must equal problems.len() ({})",
722        warms.len(),
723        problems.len()
724    );
725    let mus: Vec<Option<Number>> = warms.iter().map(|w| w.mu).collect();
726    let wrapped: Vec<WarmStartTnlp<T>> = problems
727        .into_iter()
728        .zip(warms)
729        .map(|(inner, warm)| WarmStartTnlp { inner, warm })
730        .collect();
731    solve_nlp_batch_parallel(wrapped, |i, app: &mut IpoptApplication| {
732        configure(i, app);
733        apply_warm_options(app, mus[i]);
734    })
735}
736
737#[cfg(test)]
738mod tests {
739    use super::*;
740    use pounce_nlp::tnlp::IndexStyle;
741
742    /// `min (x0 - a)^2 + (x1 - b)^2  s.t. x0 + x1 = s` — a tiny
743    /// per-instance-parameterized equality-constrained QP with an
744    /// analytic solution: `x = (a, b) + ((s - a - b)/2) * (1, 1)`.
745    struct ShiftedQuad {
746        a: f64,
747        b: f64,
748        s: f64,
749        /// Optional variable bounds (default free).
750        x_l: [f64; 2],
751        x_u: [f64; 2],
752    }
753
754    impl ShiftedQuad {
755        fn new(a: f64, b: f64, s: f64) -> Self {
756            Self {
757                a,
758                b,
759                s,
760                x_l: [-1e19; 2],
761                x_u: [1e19; 2],
762            }
763        }
764        fn expected(&self) -> [f64; 2] {
765            let t = (self.s - self.a - self.b) / 2.0;
766            [self.a + t, self.b + t]
767        }
768    }
769
770    impl TNLP for ShiftedQuad {
771        fn get_nlp_info(&mut self) -> Option<NlpInfo> {
772            Some(NlpInfo {
773                n: 2,
774                m: 1,
775                nnz_jac_g: 2,
776                nnz_h_lag: 2,
777                index_style: IndexStyle::C,
778            })
779        }
780        fn get_bounds_info(&mut self, b: BoundsInfo<'_>) -> bool {
781            b.x_l.copy_from_slice(&self.x_l);
782            b.x_u.copy_from_slice(&self.x_u);
783            b.g_l[0] = self.s;
784            b.g_u[0] = self.s;
785            true
786        }
787        fn get_starting_point(&mut self, sp: StartingPoint<'_>) -> bool {
788            sp.x[0] = 0.0;
789            sp.x[1] = 0.0;
790            true
791        }
792        fn eval_f(&mut self, x: &[Number], _new_x: bool) -> Option<Number> {
793            Some((x[0] - self.a).powi(2) + (x[1] - self.b).powi(2))
794        }
795        fn eval_grad_f(&mut self, x: &[Number], _new_x: bool, grad_f: &mut [Number]) -> bool {
796            grad_f[0] = 2.0 * (x[0] - self.a);
797            grad_f[1] = 2.0 * (x[1] - self.b);
798            true
799        }
800        fn eval_g(&mut self, x: &[Number], _new_x: bool, g: &mut [Number]) -> bool {
801            g[0] = x[0] + x[1];
802            true
803        }
804        fn eval_jac_g(
805            &mut self,
806            _x: Option<&[Number]>,
807            _new_x: bool,
808            mode: SparsityRequest<'_>,
809        ) -> bool {
810            match mode {
811                SparsityRequest::Structure { irow, jcol } => {
812                    irow.copy_from_slice(&[0, 0]);
813                    jcol.copy_from_slice(&[0, 1]);
814                }
815                SparsityRequest::Values { values } => {
816                    values.copy_from_slice(&[1.0, 1.0]);
817                }
818            }
819            true
820        }
821        fn eval_h(
822            &mut self,
823            _x: Option<&[Number]>,
824            _new_x: bool,
825            obj_factor: Number,
826            _lambda: Option<&[Number]>,
827            _new_lambda: bool,
828            mode: SparsityRequest<'_>,
829        ) -> bool {
830            match mode {
831                SparsityRequest::Structure { irow, jcol } => {
832                    irow.copy_from_slice(&[0, 1]);
833                    jcol.copy_from_slice(&[0, 1]);
834                }
835                SparsityRequest::Values { values } => {
836                    values.copy_from_slice(&[2.0 * obj_factor, 2.0 * obj_factor]);
837                }
838            }
839            true
840        }
841        fn finalize_solution(&mut self, _sol: Solution<'_>, _d: &IpoptData, _q: &IpoptCq) {}
842    }
843
844    fn configure(_i: usize, app: &mut IpoptApplication) {
845        let _ = app
846            .options_mut()
847            .set_integer_value("print_level", 0, true, false);
848        install_serial_feral_backend(app);
849    }
850
851    fn batch(k: usize) -> Vec<ShiftedQuad> {
852        (0..k)
853            .map(|i| ShiftedQuad::new(1.0 + i as f64, 2.0, 1.0 + (i % 3) as f64))
854            .collect()
855    }
856
857    /// A `ShiftedQuad` that panics inside `eval_f` when `boom` — stands in
858    /// for any Rust panic raised mid-solve (a buggy user callback, a backend
859    /// assertion). Used to prove panic isolation: the panicking instance must
860    /// fail only itself, not unwind the whole batch.
861    struct BoomQuad {
862        inner: ShiftedQuad,
863        boom: bool,
864    }
865
866    impl TNLP for BoomQuad {
867        fn get_nlp_info(&mut self) -> Option<NlpInfo> {
868            self.inner.get_nlp_info()
869        }
870        fn get_bounds_info(&mut self, b: BoundsInfo<'_>) -> bool {
871            self.inner.get_bounds_info(b)
872        }
873        fn get_starting_point(&mut self, sp: StartingPoint<'_>) -> bool {
874            self.inner.get_starting_point(sp)
875        }
876        fn eval_f(&mut self, x: &[Number], new_x: bool) -> Option<Number> {
877            if self.boom {
878                panic!("boom: simulated mid-solve panic in eval_f");
879            }
880            self.inner.eval_f(x, new_x)
881        }
882        fn eval_grad_f(&mut self, x: &[Number], new_x: bool, grad_f: &mut [Number]) -> bool {
883            self.inner.eval_grad_f(x, new_x, grad_f)
884        }
885        fn eval_g(&mut self, x: &[Number], new_x: bool, g: &mut [Number]) -> bool {
886            self.inner.eval_g(x, new_x, g)
887        }
888        fn eval_jac_g(
889            &mut self,
890            x: Option<&[Number]>,
891            new_x: bool,
892            mode: SparsityRequest<'_>,
893        ) -> bool {
894            self.inner.eval_jac_g(x, new_x, mode)
895        }
896        fn eval_h(
897            &mut self,
898            x: Option<&[Number]>,
899            new_x: bool,
900            obj_factor: Number,
901            lambda: Option<&[Number]>,
902            new_lambda: bool,
903            mode: SparsityRequest<'_>,
904        ) -> bool {
905            self.inner
906                .eval_h(x, new_x, obj_factor, lambda, new_lambda, mode)
907        }
908        fn finalize_solution(&mut self, sol: Solution<'_>, d: &IpoptData, q: &IpoptCq) {
909            self.inner.finalize_solution(sol, d, q)
910        }
911    }
912
913    #[test]
914    fn empty_batch_returns_empty() {
915        let out = solve_nlp_batch_parallel(Vec::<ShiftedQuad>::new(), configure);
916        assert!(out.is_empty());
917    }
918
919    #[test]
920    fn single_element_batch_solves() {
921        let probs = batch(1);
922        let expected = probs[0].expected();
923        let out = solve_nlp_batch_parallel(probs, configure);
924        assert_eq!(out.len(), 1);
925        assert_eq!(out[0].status, ApplicationReturnStatus::SolveSucceeded);
926        let sol = out[0].solution.as_ref().expect("solution captured");
927        assert!((sol.x[0] - expected[0]).abs() < 1e-6);
928        assert!((sol.x[1] - expected[1]).abs() < 1e-6);
929    }
930
931    #[test]
932    fn parallel_results_in_input_order_and_match_sequential() {
933        let k = 8;
934        let expected: Vec<[f64; 2]> = batch(k).iter().map(|p| p.expected()).collect();
935        let par = solve_nlp_batch_parallel(batch(k), configure);
936        let seq = solve_nlp_batch(batch(k), configure);
937        assert_eq!(par.len(), k);
938        for i in 0..k {
939            assert_eq!(
940                par[i].status,
941                ApplicationReturnStatus::SolveSucceeded,
942                "instance {i}"
943            );
944            let ps = par[i].solution.as_ref().expect("parallel solution");
945            let ss = seq[i].solution.as_ref().expect("sequential solution");
946            // Input order: each instance's analytic optimum.
947            assert!(
948                (ps.x[0] - expected[i][0]).abs() < 1e-6 && (ps.x[1] - expected[i][1]).abs() < 1e-6,
949                "instance {i}: got {:?}, expected {:?}",
950                ps.x,
951                expected[i]
952            );
953            // Parallel and sequential agree (same algorithm, same
954            // serial backend — bit-for-bit).
955            assert_eq!(ps.x, ss.x, "instance {i}");
956            assert_eq!(
957                par[i].stats.iteration_count, seq[i].stats.iteration_count,
958                "instance {i}"
959            );
960        }
961    }
962
963    #[test]
964    fn infeasible_instance_mixed_in_does_not_poison_batch() {
965        // Middle instance has contradictory bounds vs. the equality
966        // row: x0 + x1 = 10 with x <= 1 componentwise.
967        let mut probs = batch(3);
968        probs[1].s = 10.0;
969        probs[1].x_u = [1.0; 2];
970        let out = solve_nlp_batch_parallel(probs, configure);
971        assert_eq!(out.len(), 3);
972        assert_eq!(out[0].status, ApplicationReturnStatus::SolveSucceeded);
973        assert_eq!(out[2].status, ApplicationReturnStatus::SolveSucceeded);
974        assert_ne!(
975            out[1].status,
976            ApplicationReturnStatus::SolveSucceeded,
977            "infeasible instance must not report success"
978        );
979    }
980
981    #[test]
982    fn panicking_instance_does_not_poison_batch() {
983        // Middle instance panics inside `eval_f`; the surrounding good
984        // instances must still solve, the batch must keep input order, and
985        // the panicking row must surface as `InternalError` — not unwind the
986        // whole `collect()`. The default panic hook still prints the message;
987        // that is fine (and useful) — only the unwind is contained.
988        let good = batch(3);
989        let expected: Vec<[f64; 2]> = good.iter().map(|p| p.expected()).collect();
990        let probs: Vec<BoomQuad> = good
991            .into_iter()
992            .enumerate()
993            .map(|(i, inner)| BoomQuad {
994                inner,
995                boom: i == 1,
996            })
997            .collect();
998        let out = solve_nlp_batch_parallel(probs, configure);
999        assert_eq!(out.len(), 3);
1000        assert_eq!(out[1].status, ApplicationReturnStatus::InternalError);
1001        assert!(
1002            out[1].solution.is_none(),
1003            "a panicked instance carries no captured solution"
1004        );
1005        for i in [0, 2] {
1006            assert_eq!(out[i].status, ApplicationReturnStatus::SolveSucceeded);
1007            let sol = out[i].solution.as_ref().expect("solution");
1008            assert!(
1009                (sol.x[0] - expected[i][0]).abs() < 1e-6
1010                    && (sol.x[1] - expected[i][1]).abs() < 1e-6,
1011                "instance {i}: got {:?}, expected {:?}",
1012                sol.x,
1013                expected[i]
1014            );
1015        }
1016        // The sequential path is panic-isolated too (same `solve_nlp_one`).
1017        let probs_seq: Vec<BoomQuad> = batch(3)
1018            .into_iter()
1019            .enumerate()
1020            .map(|(i, inner)| BoomQuad {
1021                inner,
1022                boom: i == 1,
1023            })
1024            .collect();
1025        let seq = solve_nlp_batch(probs_seq, configure);
1026        assert_eq!(seq[1].status, ApplicationReturnStatus::InternalError);
1027        assert_eq!(seq[0].status, ApplicationReturnStatus::SolveSucceeded);
1028        assert_eq!(seq[2].status, ApplicationReturnStatus::SolveSucceeded);
1029    }
1030
1031    /// Warm-start chain (the MPC shape): solve a batch cold, perturb
1032    /// each instance's data slightly, re-solve warm-seeded from the
1033    /// previous solutions. Solutions must be correct, and the warm
1034    /// solves must not iterate more than the cold solves of the same
1035    /// perturbed instances.
1036    #[test]
1037    fn warm_started_batch_chains() {
1038        let k = 4;
1039        let cold = solve_nlp_batch_parallel(batch(k), configure);
1040        let warms: Vec<NlpWarmStart> = cold.iter().map(NlpWarmStart::from).collect();
1041
1042        // Perturb each instance: shift the equality target slightly.
1043        let perturbed = || -> Vec<ShiftedQuad> {
1044            batch(k)
1045                .into_iter()
1046                .map(|mut p| {
1047                    p.s += 0.01;
1048                    p
1049                })
1050                .collect()
1051        };
1052        let warm = solve_nlp_batch_parallel_warm(perturbed(), warms, configure);
1053        let cold2 = solve_nlp_batch_parallel(perturbed(), configure);
1054        for i in 0..k {
1055            assert_eq!(warm[i].status, ApplicationReturnStatus::SolveSucceeded);
1056            let expect = perturbed()[i].expected();
1057            let sol = warm[i].solution.as_ref().expect("warm solution");
1058            assert!(
1059                (sol.x[0] - expect[0]).abs() < 1e-5 && (sol.x[1] - expect[1]).abs() < 1e-5,
1060                "instance {i}: warm solve must reach the perturbed optimum"
1061            );
1062            assert!(
1063                warm[i].stats.iteration_count <= cold2[i].stats.iteration_count,
1064                "instance {i}: warm start took {} iters vs cold {}",
1065                warm[i].stats.iteration_count,
1066                cold2[i].stats.iteration_count
1067            );
1068        }
1069    }
1070
1071    /// A dimension-mismatched warm entry falls back to that instance's
1072    /// own cold start instead of failing.
1073    #[test]
1074    fn warm_start_dimension_mismatch_falls_back_cold() {
1075        let probs = batch(2);
1076        let expected: Vec<[f64; 2]> = probs.iter().map(|p| p.expected()).collect();
1077        let warms = vec![NlpWarmStart::default(), NlpWarmStart::default()];
1078        let out = solve_nlp_batch_parallel_warm(probs, warms, configure);
1079        for (i, r) in out.iter().enumerate() {
1080            assert_eq!(r.status, ApplicationReturnStatus::SolveSucceeded);
1081            let sol = r.solution.as_ref().expect("solution");
1082            assert!(
1083                (sol.x[0] - expected[i][0]).abs() < 1e-6
1084                    && (sol.x[1] - expected[i][1]).abs() < 1e-6
1085            );
1086        }
1087    }
1088
1089    /// The pool moves parked backends between threads (HashMap behind
1090    /// the pool's mutex), so the backend must be `Send`; the pool
1091    /// itself is shared by reference from the `Sync` configure hook.
1092    #[test]
1093    fn backend_pool_is_send_sync() {
1094        fn assert_send_sync<T: Send + Sync>() {}
1095        assert_send_sync::<FeralBackendPool>();
1096        fn assert_send<T: Send>() {}
1097        assert_send::<pounce_feral::FeralSolverInterface>();
1098    }
1099
1100    /// Pooled backends on an identical-structure batch: results match
1101    /// the fresh-backend batch within solver tolerance, and the pool
1102    /// ends up holding at most one parked solver per worker thread.
1103    #[test]
1104    fn pooled_backends_match_fresh_on_identical_structure() {
1105        let k = 8;
1106        let fresh = solve_nlp_batch_parallel(batch(k), configure);
1107
1108        let pool = FeralBackendPool::serial(pounce_feral::FeralConfig::default());
1109        let pool_for_cfg = Arc::clone(&pool);
1110        let pooled = solve_nlp_batch_parallel(batch(k), move |_i, app| {
1111            let _ = app
1112                .options_mut()
1113                .set_integer_value("print_level", 0, true, false);
1114            install_pooled_serial_feral_backend(app, &pool_for_cfg);
1115        });
1116        assert!(
1117            pool.slots.lock().map(|s| !s.is_empty()).unwrap_or(false),
1118            "at least one worker must have parked its backend"
1119        );
1120        for i in 0..k {
1121            assert_eq!(pooled[i].status, ApplicationReturnStatus::SolveSucceeded);
1122            let pf = fresh[i].solution.as_ref().expect("fresh");
1123            let pp = pooled[i].solution.as_ref().expect("pooled");
1124            for j in 0..2 {
1125                assert!(
1126                    (pf.x[j] - pp.x[j]).abs() < 1e-6,
1127                    "instance {i} x[{j}]: pooled {} vs fresh {}",
1128                    pp.x[j],
1129                    pf.x[j]
1130                );
1131            }
1132        }
1133    }
1134
1135    /// Ragged convergence: mix trivially-easy instances with ones that
1136    /// start far away; per-instance iteration counts may differ and
1137    /// every result still lands at its own optimum in input order.
1138    #[test]
1139    fn ragged_iteration_counts_keep_order() {
1140        let probs: Vec<ShiftedQuad> = (0..6)
1141            .map(|i| ShiftedQuad::new(10f64.powi(i - 3), 2.0, 1.0))
1142            .collect();
1143        let expected: Vec<[f64; 2]> = probs.iter().map(|p| p.expected()).collect();
1144        let out = solve_nlp_batch_parallel(probs, configure);
1145        for (i, r) in out.iter().enumerate() {
1146            assert_eq!(r.status, ApplicationReturnStatus::SolveSucceeded);
1147            let sol = r.solution.as_ref().expect("solution");
1148            assert!(
1149                (sol.x[0] - expected[i][0]).abs() < 1e-5
1150                    && (sol.x[1] - expected[i][1]).abs() < 1e-5,
1151                "instance {i}: got {:?}, expected {:?}",
1152                sol.x,
1153                expected[i]
1154            );
1155        }
1156    }
1157}