Skip to main content

pounce_sensitivity/
algorithm_backsolver.rs

1//! `PdSensBacksolver` — `SensBacksolver` adapter over the converged
2//! `PdFullSpaceSolver` from `pounce-algorithm`.
3//!
4//! This is the Phase B.2 piece tracked in
5//! [pounce#16](https://github.com/jkitchin/pounce/issues/16): it lets
6//! `pounce-sensitivity` drive backsolves against the real converged
7//! KKT factor, replacing the synthetic [`crate::DenseLuBacksolver`]
8//! used by Phase B.1 unit tests.
9//!
10//! # Use
11//!
12//! 1. Register an `on_converged` callback on `IpoptApplication` via
13//!    [`pounce_algorithm::application::IpoptApplication::set_on_converged`].
14//! 2. Inside the callback, build a `PdSensBacksolver` from the four
15//!    handles passed in (`data`, `cq`, `nlp`, `&mut pd_solver`).
16//! 3. Hand it to [`crate::SensApplication`] / a `SensStepCalc` /
17//!    [`crate::compute_reduced_hessian`] like any other
18//!    [`SensBacksolver`].
19//!
20//! Upstream `SensSimpleBacksolver`
21//! ([`ref/Ipopt/contrib/sIPOPT/src/SensSimpleBacksolver.cpp`](../../../ref/Ipopt/contrib/sIPOPT/src/SensSimpleBacksolver.cpp))
22//! is the analogous wrapper around `IpoptCalculatedQuantities` +
23//! `PDSystemSolver` upstream.
24//!
25//! # Flat-slice ↔ `IteratesVector` mapping
26//!
27//! The full primal-dual state of pounce's IPM is the eight-block
28//! compound `(x, s, λ_c, λ_d, z_l, z_u, v_l, v_u)` (see
29//! [`pounce_algorithm::iterates_vector::IteratesVector`]). This
30//! adapter packs / unpacks the flat slices that
31//! [`crate::SensBacksolver`] takes as the concatenation
32//! `x || s || λ_c || λ_d || z_l || z_u || v_l || v_u`, mirroring
33//! upstream's `CompoundVector` layout (`IpCompoundVector.hpp`).
34//!
35//! # Reference
36//!
37//! Pirnay, H.; López-Negrete, R.; Biegler, L. T. (2012). *Optimal
38//! sensitivity based on IPOPT*. Mathematical Programming Computation,
39//! **4**(4), 307–331. DOI:
40//! [10.1007/s12532-012-0043-2](https://doi.org/10.1007/s12532-012-0043-2).
41//! Verified via Crossref on 2026-05-13.
42
43use std::cell::RefCell;
44use std::rc::Rc;
45
46use pounce_algorithm::ipopt_cq::IpoptCqHandle;
47use pounce_algorithm::ipopt_data::IpoptDataHandle;
48use pounce_algorithm::iterates_vector::{IteratesVector, IteratesVectorMut};
49use pounce_algorithm::kkt::pd_full_space_solver::PdFullSpaceSolver;
50use pounce_common::types::{Index, Number};
51use pounce_linalg::dense_vector::DenseVector;
52use pounce_nlp::ipopt_nlp::IpoptNlp;
53
54use crate::backsolver::SensBacksolver;
55
56/// Adapter from `PdFullSpaceSolver` to [`SensBacksolver`]. Holds
57/// owning clones of the four pieces of the algorithm's converged
58/// state, plus the 8-block iterate template used to allocate fresh
59/// RHS / LHS vectors.
60///
61/// The PD solver lives behind an `Rc<RefCell<…>>` because
62/// [`SensBacksolver::solve`] is `&self` but the upstream signature
63/// for `PdFullSpaceSolver::solve` is `&mut self` (it caches the
64/// last-solve dependency tags and the augsys-improved flag). The
65/// `RefCell` is single-thread-only, single-borrow, exactly matching
66/// the call pattern from `pounce-sensitivity`'s pipeline.
67///
68/// Owning (rather than borrowing) the four handles is what lets a
69/// `PdSensBacksolver` outlive the `on_converged` callback frame —
70/// required by the public `Solver` session API in `pounce-algorithm`,
71/// which retains the backsolver for repeated `parametric_step` /
72/// `kkt_solve` / `compute_reduced_hessian` calls after the IPM has
73/// returned. The data, cq, and nlp handles are already
74/// `Rc<RefCell<…>>` cheap-clone handles upstream, so this carries no
75/// allocation overhead.
76#[derive(Clone)]
77pub struct PdSensBacksolver {
78    /// Shared, interior-mutable handle to the converged PD solver.
79    /// Cloned from `PdSearchDirCalc::pd_solver_rc()` at construction.
80    pd: Rc<RefCell<PdFullSpaceSolver>>,
81    data: IpoptDataHandle,
82    cq: IpoptCqHandle,
83    nlp: Rc<RefCell<dyn IpoptNlp>>,
84    /// Block dimensions in `(x, s, y_c, y_d, z_l, z_u, v_l, v_u)` order.
85    dims: [usize; 8],
86    /// 8-block prototype used to mint fresh vectors with the same
87    /// `VectorSpace`s as the converged iterate; cloned from
88    /// `data.borrow().curr`.
89    template: IteratesVector,
90    /// Natural-units row/column scaling pair (pounce#128). The IPM's
91    /// KKT factor is held in the NLP's internally **scaled** space
92    /// (objective factor `df`, per-row constraint factors `dc` / `dd`
93    /// from `nlp_scaling_method`; scaled multipliers `ỹ = (df/dc)·y`,
94    /// `z̃ = df·z`, `ṽ = (df/dd)·v`, scaled slack `s̃ = dd·s`). The
95    /// scaled 8-block primal-dual system is the two-sided diagonal
96    /// scaling `K̃ = E K F` of the natural-units system, with
97    /// per-block entries
98    ///
99    /// ```text
100    ///        x      s        y_c      y_d     z_l/z_u   v_l/v_u
101    /// E  =   df     df/dd_i  dc_i     dd_i    df        df
102    /// F  =   1      1/dd_i   dc_i/df  dd_i/df 1/df      dd_r(j)/df
103    /// ```
104    ///
105    /// (`dd_r(j)` = the d-row scaling of the j-th finite d-bound,
106    /// through the `pd_l` / `pd_u` expansion). Hence
107    /// `K⁻¹ = F K̃⁻¹ E`: scale the RHS by `E`, back-solve against the
108    /// held factor, scale the result by `F`. Unlike a symmetric
109    /// congruence this needs no square root, so it covers a negative
110    /// `obj_scaling_factor` (maximization) and covers the z/v
111    /// bound-multiplier rows exactly (those rows admit no symmetric
112    /// diagonal: `K̃_{z,x} = df·Z·Pᵀ` but `K̃_{z,z} = X − x_L` is
113    /// unscaled). `None` ⇔ scaling inactive, identity.
114    conj: Option<Rc<ConjPair>>,
115}
116
117/// Left/right diagonal pair for the natural-units back-solve; see the
118/// `conj` field doc on [`PdSensBacksolver`]. Both vectors are
119/// flat-KKT-length, in the `x‖s‖y_c‖y_d‖z_l‖z_u‖v_l‖v_u` packing.
120struct ConjPair {
121    /// `E`: multiplied into the RHS before the scaled-space solve.
122    e: Vec<Number>,
123    /// `F`: multiplied into the solution after the scaled-space solve.
124    f: Vec<Number>,
125}
126
127impl PdSensBacksolver {
128    /// Construct from the four handles handed in by the `on_converged`
129    /// callback. Errors if `data` has no `curr` (i.e. the algorithm
130    /// never reached an iterate — should not happen on
131    /// `SolveSucceeded`) or the NLP reports scaling data inconsistent
132    /// with the converged iterate (see [`Self::natural_units_conj`]).
133    pub fn new(
134        data: &IpoptDataHandle,
135        cq: &IpoptCqHandle,
136        nlp: &Rc<RefCell<dyn IpoptNlp>>,
137        pd: Rc<RefCell<PdFullSpaceSolver>>,
138    ) -> Result<Self, String> {
139        let curr = data
140            .borrow()
141            .curr
142            .clone()
143            .ok_or_else(|| "no current iterate at convergence".to_string())?;
144        let dims = [
145            curr.x.dim() as usize,
146            curr.s.dim() as usize,
147            curr.y_c.dim() as usize,
148            curr.y_d.dim() as usize,
149            curr.z_l.dim() as usize,
150            curr.z_u.dim() as usize,
151            curr.v_l.dim() as usize,
152            curr.v_u.dim() as usize,
153        ];
154        let conj = Self::natural_units_conj(nlp, &dims)?;
155        Ok(Self {
156            pd,
157            data: Rc::clone(data),
158            cq: Rc::clone(cq),
159            nlp: Rc::clone(nlp),
160            dims,
161            template: curr,
162            conj,
163        })
164    }
165
166    /// Build the natural-units scaling pair `(E, F)` from the NLP's
167    /// effective scaling (see the field doc on [`Self::conj`]).
168    /// Returns `Ok(None)` when no scaling is active. Errors when the
169    /// NLP reports scaling data inconsistent with the converged
170    /// iterate's block dimensions (would silently corrupt every
171    /// back-solve) or a zero/non-finite `df`.
172    fn natural_units_conj(
173        nlp: &Rc<RefCell<dyn IpoptNlp>>,
174        dims: &[usize; 8],
175    ) -> Result<Option<Rc<ConjPair>>, String> {
176        let nlp_ref = nlp.borrow();
177        let df = nlp_ref.obj_scaling_factor();
178        let dc = nlp_ref.c_scale_vec();
179        let dd = nlp_ref.d_scale_vec();
180        if df == 1.0 && dc.is_none() && dd.is_none() {
181            return Ok(None);
182        }
183        // df may be negative (obj_scaling_factor < 0 means maximize);
184        // the two-sided scaling needs no square root, only df ≠ 0.
185        if !df.is_finite() || df == 0.0 {
186            return Err(format!("invalid obj_scaling_factor {df}"));
187        }
188        if let Some(v) = &dc {
189            if v.len() != dims[2] {
190                return Err(format!("c_scale length {} != y_c dim {}", v.len(), dims[2]));
191            }
192        }
193        if let Some(v) = &dd {
194            if v.len() != dims[3] || dims[1] != dims[3] {
195                return Err(format!(
196                    "d_scale length {} != y_d dim {} (s dim {})",
197                    v.len(),
198                    dims[3],
199                    dims[1]
200                ));
201            }
202        }
203        // Per-entry d-row scale for the compressed v_l / v_u blocks:
204        // entry j of v_l covers the d row pd_l.expanded_pos[j].
205        let v_row_scale = |pm: Rc<dyn pounce_linalg::matrix::Matrix>,
206                           n_v: usize,
207                           which: &str|
208         -> Result<Vec<Number>, String> {
209            let Some(dd) = &dd else {
210                return Ok(vec![1.0; n_v]);
211            };
212            if n_v == 0 {
213                return Ok(Vec::new());
214            }
215            let Some(em) = pm
216                .as_any()
217                .downcast_ref::<pounce_linalg::expansion_matrix::ExpansionMatrix>()
218            else {
219                return Err(format!("{which} is not an ExpansionMatrix"));
220            };
221            let pos = em.expanded_pos_indices();
222            if pos.len() != n_v {
223                return Err(format!(
224                    "{which} expansion length {} != {} block dim {}",
225                    pos.len(),
226                    which,
227                    n_v
228                ));
229            }
230            pos.iter()
231                .map(|&r| {
232                    dd.get(r as usize).copied().ok_or_else(|| {
233                        format!(
234                            "{which} expansion row {r} out of d_scale range {}",
235                            dd.len()
236                        )
237                    })
238                })
239                .collect()
240        };
241        let vl_dd = v_row_scale(nlp_ref.pd_l(), dims[6], "pd_l")?;
242        let vu_dd = v_row_scale(nlp_ref.pd_u(), dims[7], "pd_u")?;
243        drop(nlp_ref);
244
245        let total: usize = dims.iter().sum();
246        let mut e = Vec::with_capacity(total);
247        let mut f = Vec::with_capacity(total);
248        // x block: E = df, F = 1.
249        e.extend(std::iter::repeat_n(df, dims[0]));
250        f.extend(std::iter::repeat_n(1.0, dims[0]));
251        // s block: E = df/dd_i, F = 1/dd_i (slacks live in scaled d-space).
252        match &dd {
253            Some(v) => {
254                e.extend(v.iter().map(|&ddi| df / ddi));
255                f.extend(v.iter().map(|&ddi| 1.0 / ddi));
256            }
257            None => {
258                e.extend(std::iter::repeat_n(df, dims[1]));
259                f.extend(std::iter::repeat_n(1.0, dims[1]));
260            }
261        }
262        // y_c block: E = dc_i, F = dc_i/df.
263        match &dc {
264            Some(v) => {
265                e.extend(v.iter().copied());
266                f.extend(v.iter().map(|&dci| dci / df));
267            }
268            None => {
269                e.extend(std::iter::repeat_n(1.0, dims[2]));
270                f.extend(std::iter::repeat_n(1.0 / df, dims[2]));
271            }
272        }
273        // y_d block: E = dd_i, F = dd_i/df.
274        match &dd {
275            Some(v) => {
276                e.extend(v.iter().copied());
277                f.extend(v.iter().map(|&ddi| ddi / df));
278            }
279            None => {
280                e.extend(std::iter::repeat_n(1.0, dims[3]));
281                f.extend(std::iter::repeat_n(1.0 / df, dims[3]));
282            }
283        }
284        // z_l / z_u blocks: E = df, F = 1/df (z̃ = df·z; bounds on x
285        // are unscaled so the slack diagonal X − x_L is shared by both
286        // systems).
287        e.extend(std::iter::repeat_n(df, dims[4] + dims[5]));
288        f.extend(std::iter::repeat_n(1.0 / df, dims[4] + dims[5]));
289        // v_l / v_u blocks: E = df, F = dd_r/df (ṽ = (df/dd)·v and the
290        // slack diagonal s̃ − d̃_l = dd·(s − d_l) carries the d-row
291        // scale).
292        e.extend(std::iter::repeat_n(df, dims[6] + dims[7]));
293        f.extend(vl_dd.iter().map(|&ddr| ddr / df));
294        f.extend(vu_dd.iter().map(|&ddr| ddr / df));
295        Ok(Some(Rc::new(ConjPair { e, f })))
296    }
297
298    /// Effective objective scaling factor `df` of the converged NLP
299    /// (1.0 when no scaling is active).
300    pub fn obj_scaling_factor(&self) -> Number {
301        self.nlp.borrow().obj_scaling_factor()
302    }
303
304    /// Effective NLP scaling at convergence:
305    /// `(obj_scaling_factor, c_scale, d_scale)`. The vectors are
306    /// `None` when the corresponding block carries no row scaling.
307    pub fn nlp_scaling(&self) -> (Number, Option<Vec<Number>>, Option<Vec<Number>>) {
308        let n = self.nlp.borrow();
309        (n.obj_scaling_factor(), n.c_scale_vec(), n.d_scale_vec())
310    }
311
312    /// Inertia-correction perturbations `(δ_x, δ_s, δ_c, δ_d)` baked
313    /// into the held KKT factor (the IPM's `current_perturbation`
314    /// state at convergence). All zero ⇔ the final factorization was
315    /// unregularized and the natural-units back-solves invert the
316    /// exact KKT matrix. Nonzero ⇔ the factor carries a (scaled-space)
317    /// regularization, so sensitivity outputs — covariance in
318    /// particular — are perturbed and no longer exactly
319    /// scaling-invariant; consumers should check this before trusting
320    /// `-inv(reduced_hessian)` on ill-conditioned problems
321    /// (pounce#128 follow-up).
322    pub fn kkt_perturbations(&self) -> [Number; 4] {
323        let p = self.data.borrow().perturbations;
324        [p.delta_x, p.delta_s, p.delta_c, p.delta_d]
325    }
326
327    /// Map user-facing 0-based `g(x)` indices of parameter-pin
328    /// equality constraints to flat KKT rows **and** the pin rows'
329    /// `dc_i` scaling factors, in one pass. The KKT row of pin `i` is
330    /// `n_x + n_s + c_block_idx`, i.e. the matching `y_c` slot, found
331    /// through `IpoptNlp::full_g_to_c_block` so the c/d split's row
332    /// permutation is honored (pounce#128 follow-up: the previous
333    /// direct `n_x + n_s + g_idx` mapping silently picked wrong rows
334    /// when inequalities preceded the pins). The scales are 1.0 when
335    /// no constraint scaling is active; they relate the natural and
336    /// solver-space reduced Hessians via
337    /// `H̃_ij = (df / (dc_i·dc_j)) · H_ij`. Errors when a pin index
338    /// is out of range or refers to an inequality row.
339    pub fn pin_rows_and_c_scales(
340        &self,
341        pin_g_indices: &[Index],
342    ) -> Result<(Vec<Index>, Vec<Number>), String> {
343        let y_c_offset = (self.dims[0] + self.dims[1]) as Index;
344        let nlp = self.nlp.borrow();
345        let dc = nlp.c_scale_vec();
346        let n_full_g = nlp.n_full_g();
347        let mut rows = Vec::with_capacity(pin_g_indices.len());
348        let mut scales = Vec::with_capacity(pin_g_indices.len());
349        for &gi in pin_g_indices {
350            // n_full_g() defaults to 0 for IpoptNlp impls that don't
351            // report it; only range-check when it's meaningful.
352            if gi < 0 || (n_full_g > 0 && gi >= n_full_g) {
353                return Err(format!(
354                    "pin constraint index {gi} out of range [0, m={n_full_g})"
355                ));
356            }
357            let Some(ci) = nlp.full_g_to_c_block(gi) else {
358                return Err(format!(
359                    "pin constraint index {gi} is an inequality (not an equality row); \
360                     parameter pins must be exact equalities"
361                ));
362            };
363            rows.push(y_c_offset + ci);
364            scales.push(dc.as_ref().map(|v| v[ci as usize]).unwrap_or(1.0));
365        }
366        Ok((rows, scales))
367    }
368
369    /// KKT-row half of [`Self::pin_rows_and_c_scales`].
370    pub fn map_pin_g_to_kkt_rows(&self, pin_g_indices: &[Index]) -> Result<Vec<Index>, String> {
371        Ok(self.pin_rows_and_c_scales(pin_g_indices)?.0)
372    }
373
374    /// Scaling half of [`Self::pin_rows_and_c_scales`].
375    pub fn pin_c_scales(&self, pin_g_indices: &[Index]) -> Result<Vec<Number>, String> {
376        Ok(self.pin_rows_and_c_scales(pin_g_indices)?.1)
377    }
378
379    /// Block dimensions of the compound KKT vector at convergence, in
380    /// `(x, s, y_c, y_d, z_l, z_u, v_l, v_u)` order. Sum equals
381    /// [`SensBacksolver::dim`]. Useful when a caller needs to compute
382    /// the flat offset of a non-x block (e.g. `n_x + n_s` for the
383    /// start of the equality-multiplier `y_c` block).
384    pub fn block_dims(&self) -> [usize; 8] {
385        self.dims
386    }
387
388    /// Map a 0-based **full-g** index (user-TNLP `g(x)` order) to its
389    /// 0-based position in the equality-multiplier `y_c` block, or
390    /// `None` when the constraint is an inequality (it lives in the `d`
391    /// block, not `y_c`). Delegates to the held NLP's c/d-split map.
392    ///
393    /// Pin-row construction must route through this: the flat KKT row of
394    /// a pinned equality is `n_x + n_s + full_g_to_c_block(g)`, NOT
395    /// `n_x + n_s + g` — those differ whenever any inequality precedes
396    /// the pinned equality in `g(x)`.
397    pub fn full_g_to_c_block(&self, full_idx: Index) -> Option<Index> {
398        self.nlp.borrow().full_g_to_c_block(full_idx)
399    }
400
401    /// Cumulative block offsets: `offset(i)` is the start index of
402    /// block `i` in the flat slice.
403    fn offsets(&self) -> [usize; 9] {
404        let mut o = [0usize; 9];
405        for i in 0..8 {
406            o[i + 1] = o[i] + self.dims[i];
407        }
408        o
409    }
410
411    /// Pack a flat slice into a freshly-allocated `IteratesVectorMut`
412    /// shaped like the converged iterate.
413    fn pack(&self, flat: &[Number]) -> Result<IteratesVectorMut, ()> {
414        let mut out = self.template.make_new_zeroed();
415        let off = self.offsets();
416        let blocks: [&mut Box<dyn pounce_linalg::vector::Vector>; 8] = [
417            &mut out.x,
418            &mut out.s,
419            &mut out.y_c,
420            &mut out.y_d,
421            &mut out.z_l,
422            &mut out.z_u,
423            &mut out.v_l,
424            &mut out.v_u,
425        ];
426        for (i, blk) in blocks.into_iter().enumerate() {
427            let slice = &flat[off[i]..off[i + 1]];
428            let dv = blk.as_any_mut().downcast_mut::<DenseVector>().ok_or(())?;
429            dv.set_values(slice);
430        }
431        Ok(out)
432    }
433
434    /// Read an `IteratesVectorMut` into a flat slice. Uses
435    /// [`DenseVector::expanded_values`] rather than `values()` so
436    /// blocks that the IPM left in homogeneous-scalar form (typical
437    /// for empty z_l/z_u/v_l/v_u when the TNLP has no bounds) are
438    /// materialized rather than panicking.
439    fn unpack(&self, iv: &IteratesVectorMut, out: &mut [Number]) -> Result<(), ()> {
440        let off = self.offsets();
441        let blocks: [&Box<dyn pounce_linalg::vector::Vector>; 8] = [
442            &iv.x, &iv.s, &iv.y_c, &iv.y_d, &iv.z_l, &iv.z_u, &iv.v_l, &iv.v_u,
443        ];
444        for (i, blk) in blocks.into_iter().enumerate() {
445            let dst = &mut out[off[i]..off[i + 1]];
446            if dst.is_empty() {
447                continue;
448            }
449            let dv = (**blk).as_any().downcast_ref::<DenseVector>().ok_or(())?;
450            let ev = dv.expanded_values();
451            dst.copy_from_slice(&ev);
452        }
453        Ok(())
454    }
455}
456
457impl PdSensBacksolver {
458    /// Batched-RHS back-solve over the held factor. `rhs_flat` and
459    /// `lhs_flat` are row-major `(n_rhs, dim)` buffers. Equivalent to
460    /// looping [`SensBacksolver::solve`] over each row but reuses one
461    /// frozen `IteratesVector` for the RHS and one `IteratesVectorMut`
462    /// for the result across all `n_rhs` calls into
463    /// [`PdFullSpaceSolver::solve`]. The pack step writes into the
464    /// existing `DenseVector` storage via `Rc::get_mut` +
465    /// `set_values`, and the unpack step reads it back via `values()`
466    /// /`scalar()` — skipping the per-call 8-block `make_new_zeroed`
467    /// (Box alloc) in `pack` and the per-block `expanded_values()` Vec
468    /// alloc in `unpack` that otherwise dominate the held-factor
469    /// back-solve cost under `jax.jacrev` over a JaxProblem solve
470    /// (pounce#77 follow-up).
471    ///
472    /// The matrix and perturbation state inside `PdFullSpaceSolver`
473    /// are unchanged across calls, so each iteration hits the cached
474    /// fast path in `solve_once` (`uptodate && !pretend_singular`).
475    ///
476    /// Like [`SensBacksolver::solve`], results are in **natural
477    /// (unscaled) units** — see [`Self::solve_many_scaled_space`] for
478    /// the raw solver-space back-solve.
479    pub fn solve_many(&self, rhs_flat: &[Number], lhs_flat: &mut [Number], n_rhs: usize) -> bool {
480        match &self.conj {
481            None => self.solve_many_scaled_space(rhs_flat, lhs_flat, n_rhs),
482            Some(c) => {
483                let total = self.dim();
484                if rhs_flat.len() != n_rhs * total || lhs_flat.len() != n_rhs * total {
485                    return false;
486                }
487                let mut rhs_scaled = rhs_flat.to_vec();
488                for row in rhs_scaled.chunks_mut(total) {
489                    for (r, &ei) in row.iter_mut().zip(c.e.iter()) {
490                        *r *= ei;
491                    }
492                }
493                if !self.solve_many_scaled_space(&rhs_scaled, lhs_flat, n_rhs) {
494                    return false;
495                }
496                for row in lhs_flat.chunks_mut(total) {
497                    for (l, &fi) in row.iter_mut().zip(c.f.iter()) {
498                        *l *= fi;
499                    }
500                }
501                true
502            }
503        }
504    }
505
506    /// Batched-RHS back-solve against the held factor in the solver's
507    /// internal **scaled** space (no natural-units conjugation). Same
508    /// buffer contract as [`Self::solve_many`].
509    pub fn solve_many_scaled_space(
510        &self,
511        rhs_flat: &[Number],
512        lhs_flat: &mut [Number],
513        n_rhs: usize,
514    ) -> bool {
515        let total = self.dim();
516        if rhs_flat.len() != n_rhs * total || lhs_flat.len() != n_rhs * total {
517            return false;
518        }
519        if n_rhs == 0 {
520            return true;
521        }
522        let off = self.offsets();
523
524        // Tier 1: fully-inline flat-slice path. `PdFullSpaceSolver::
525        // solve_many_cached_flat` downcasts the slack / z / v vectors to
526        // `DenseVector` and the bound-expansion matrices to
527        // `ExpansionMatrix` once at the top, then runs Phase 1 / Phase 3
528        // as raw scatter-add / divide loops on flat slices with no dyn
529        // dispatch in the per-RHS inner loops. Returns `None` if a
530        // downcast fails (homogeneous-on-non-empty block, unusual matrix
531        // type) — we fall to Tier 2.
532        {
533            let mut pd_ref = self.pd.borrow_mut();
534            let fast_flat = pd_ref.solve_many_cached_flat(
535                &self.data, &self.cq, &self.nlp, n_rhs, rhs_flat, lhs_flat, self.dims,
536            );
537            match fast_flat {
538                Some(true) => return true,
539                Some(false) => return false,
540                None => { /* fall through to Tier 2 */ }
541            }
542        }
543
544        // Tier 2: closure-based cached-factor path. Same single
545        // back-substitution through the linsol, but Phase 1 / Phase 3
546        // go through `dyn Vector` / `dyn Matrix` ops on a per-RHS
547        // `IteratesVectorMut`. Slower than Tier 1 but correct for
548        // homogeneous DenseVectors and non-`ExpansionMatrix` bound
549        // expansions.
550        {
551            let mut pd_ref = self.pd.borrow_mut();
552            let fast = pd_ref.solve_many_cached(
553                &self.data,
554                &self.cq,
555                &self.nlp,
556                n_rhs,
557                |k, iv| {
558                    let row = &rhs_flat[k * total..(k + 1) * total];
559                    let _ = write_rhs_box(&mut iv.x, &row[off[0]..off[1]])
560                        && write_rhs_box(&mut iv.s, &row[off[1]..off[2]])
561                        && write_rhs_box(&mut iv.y_c, &row[off[2]..off[3]])
562                        && write_rhs_box(&mut iv.y_d, &row[off[3]..off[4]])
563                        && write_rhs_box(&mut iv.z_l, &row[off[4]..off[5]])
564                        && write_rhs_box(&mut iv.z_u, &row[off[5]..off[6]])
565                        && write_rhs_box(&mut iv.v_l, &row[off[6]..off[7]])
566                        && write_rhs_box(&mut iv.v_u, &row[off[7]..off[8]]);
567                },
568                |k, iv| {
569                    let row = &mut lhs_flat[k * total..(k + 1) * total];
570                    let _ = read_res_block(&*iv.x, &mut row[off[0]..off[1]])
571                        && read_res_block(&*iv.s, &mut row[off[1]..off[2]])
572                        && read_res_block(&*iv.y_c, &mut row[off[2]..off[3]])
573                        && read_res_block(&*iv.y_d, &mut row[off[3]..off[4]])
574                        && read_res_block(&*iv.z_l, &mut row[off[4]..off[5]])
575                        && read_res_block(&*iv.z_u, &mut row[off[5]..off[6]])
576                        && read_res_block(&*iv.v_l, &mut row[off[6]..off[7]])
577                        && read_res_block(&*iv.v_u, &mut row[off[7]..off[8]]);
578                },
579            );
580            match fast {
581                Some(true) => return true,
582                Some(false) => return false,
583                None => { /* fall through to per-RHS loop */ }
584            }
585        }
586
587        // Per-RHS fallback: reuse one frozen rhs and one mut sol across
588        // all n_rhs `solve` calls.
589        let rhs_mut0 = self.template.make_new_zeroed();
590        let mut rhs_iv = rhs_mut0.freeze();
591        let mut res_iv = self.template.make_new_zeroed();
592
593        let mut pd_ref = self.pd.borrow_mut();
594        for k in 0..n_rhs {
595            let rhs_row = &rhs_flat[k * total..(k + 1) * total];
596            let lhs_row = &mut lhs_flat[k * total..(k + 1) * total];
597
598            if !write_rhs_block(&mut rhs_iv.x, &rhs_row[off[0]..off[1]])
599                || !write_rhs_block(&mut rhs_iv.s, &rhs_row[off[1]..off[2]])
600                || !write_rhs_block(&mut rhs_iv.y_c, &rhs_row[off[2]..off[3]])
601                || !write_rhs_block(&mut rhs_iv.y_d, &rhs_row[off[3]..off[4]])
602                || !write_rhs_block(&mut rhs_iv.z_l, &rhs_row[off[4]..off[5]])
603                || !write_rhs_block(&mut rhs_iv.z_u, &rhs_row[off[5]..off[6]])
604                || !write_rhs_block(&mut rhs_iv.v_l, &rhs_row[off[6]..off[7]])
605                || !write_rhs_block(&mut rhs_iv.v_u, &rhs_row[off[7]..off[8]])
606            {
607                return false;
608            }
609
610            let ok = pd_ref.solve(
611                &self.data,
612                &self.cq,
613                &self.nlp,
614                1.0,
615                0.0,
616                &rhs_iv,
617                &mut res_iv,
618                /* allow_inexact = */ true,
619                /* improve_solution = */ false,
620            );
621            if !ok {
622                return false;
623            }
624
625            if !read_res_block(&*res_iv.x, &mut lhs_row[off[0]..off[1]])
626                || !read_res_block(&*res_iv.s, &mut lhs_row[off[1]..off[2]])
627                || !read_res_block(&*res_iv.y_c, &mut lhs_row[off[2]..off[3]])
628                || !read_res_block(&*res_iv.y_d, &mut lhs_row[off[3]..off[4]])
629                || !read_res_block(&*res_iv.z_l, &mut lhs_row[off[4]..off[5]])
630                || !read_res_block(&*res_iv.z_u, &mut lhs_row[off[5]..off[6]])
631                || !read_res_block(&*res_iv.v_l, &mut lhs_row[off[6]..off[7]])
632                || !read_res_block(&*res_iv.v_u, &mut lhs_row[off[7]..off[8]])
633            {
634                return false;
635            }
636        }
637        true
638    }
639}
640
641/// Write `slice` into the `DenseVector` behind `b` in place. Used by
642/// the fast path's `write_rhs` closure, where the new
643/// `PdFullSpaceSolver::solve_many_cached` API hands back an
644/// `IteratesVectorMut` (Box-backed blocks).
645fn write_rhs_box(b: &mut Box<dyn pounce_linalg::vector::Vector>, slice: &[Number]) -> bool {
646    if slice.is_empty() {
647        return true;
648    }
649    let Some(dv) = b.as_any_mut().downcast_mut::<DenseVector>() else {
650        return false;
651    };
652    dv.set_values(slice);
653    true
654}
655
656/// Write `slice` into the `DenseVector` behind `rc` in place. Returns
657/// `false` if the Rc is unexpectedly shared (would indicate a bug in
658/// `PdFullSpaceSolver::solve`'s borrow discipline — it should never
659/// `Rc::clone` from the rhs vector) or if the block is not a
660/// `DenseVector`.
661fn write_rhs_block(rc: &mut Rc<dyn pounce_linalg::vector::Vector>, slice: &[Number]) -> bool {
662    if slice.is_empty() {
663        return true;
664    }
665    let Some(v) = Rc::get_mut(rc) else {
666        return false;
667    };
668    let Some(dv) = v.as_any_mut().downcast_mut::<DenseVector>() else {
669        return false;
670    };
671    dv.set_values(slice);
672    true
673}
674
675/// Read the `DenseVector` behind `blk` into `dst`. Handles the
676/// homogeneous case (empty z/v blocks for a TNLP with no bounds) by
677/// broadcasting the scalar rather than calling `expanded_values()`,
678/// which would allocate a fresh `Vec<Number>` every call.
679fn read_res_block(blk: &dyn pounce_linalg::vector::Vector, dst: &mut [Number]) -> bool {
680    if dst.is_empty() {
681        return true;
682    }
683    let Some(dv) = blk.as_any().downcast_ref::<DenseVector>() else {
684        return false;
685    };
686    if dv.is_homogeneous() {
687        let s = dv.scalar();
688        for x in dst.iter_mut() {
689            *x = s;
690        }
691    } else {
692        dst.copy_from_slice(dv.values());
693    }
694    true
695}
696
697impl PdSensBacksolver {
698    /// Single-RHS back-solve against the held factor in the solver's
699    /// internal **scaled** space (no natural-units conjugation). This
700    /// is the value [`SensBacksolver::solve`] returned before
701    /// pounce#128; kept for callers that want the raw factor.
702    pub fn solve_scaled_space(&self, rhs: &[Number], lhs: &mut [Number]) -> bool {
703        let total = self.dim();
704        if rhs.len() != total || lhs.len() != total {
705            return false;
706        }
707        // Pack rhs into block form.
708        let rhs_mut = match self.pack(rhs) {
709            Ok(v) => v,
710            Err(()) => return false,
711        };
712        let rhs_iv = rhs_mut.freeze();
713        // Fresh result slot, zeroed.
714        let mut res_iv = self.template.make_new_zeroed();
715
716        // K · lhs = rhs   ⇒   solve(α=1, β=0, rhs, res) writes
717        // res = K⁻¹ · rhs.
718        //
719        // `allow_inexact=true` mirrors upstream sIPOPT's
720        // `SensSimpleBacksolver`: skip `PdFullSpaceSolver`'s iterative-
721        // refinement loop and accept the first back-solve against the
722        // held factor. The IPM-level refinement (`min_refinement_steps
723        // = 1`, residual_ratio_max = 1e-10`) is there to clean up
724        // numerical noise during forward IPM steps; for the held-factor
725        // back-solve used by sens / JaxProblem bwd, it ~doubles the
726        // per-call cost and produces gains that are below `tol`. Under
727        // `jax.jacrev` over a JaxProblem solve this dominates the wall
728        // time at moderate `n+m` (pounce#77 follow-up).
729        let ok = {
730            let mut pd_ref = self.pd.borrow_mut();
731            pd_ref.solve(
732                &self.data,
733                &self.cq,
734                &self.nlp,
735                1.0,
736                0.0,
737                &rhs_iv,
738                &mut res_iv,
739                /* allow_inexact = */ true,
740                /* improve_solution = */ false,
741            )
742        };
743        if !ok {
744            return false;
745        }
746        self.unpack(&res_iv, lhs).is_ok()
747    }
748}
749
750impl SensBacksolver for PdSensBacksolver {
751    fn dim(&self) -> usize {
752        self.dims.iter().sum()
753    }
754
755    /// Solve `K · lhs = rhs` against the converged factor, in
756    /// **natural (unscaled) units** (pounce#128): when the NLP carries
757    /// active scaling (`nlp_scaling_method`, `obj_scaling_factor`,
758    /// user scaling) the RHS is pre-multiplied by `E` and the result
759    /// post-multiplied by `F` (see the `conj` field doc), so
760    /// `lhs = K_natural⁻¹ rhs` for **all eight blocks** — including
761    /// the z/v bound-multiplier rows. Use
762    /// [`Self::solve_scaled_space`] for the raw factor.
763    fn solve(&self, rhs: &[Number], lhs: &mut [Number]) -> bool {
764        match &self.conj {
765            None => self.solve_scaled_space(rhs, lhs),
766            Some(c) => {
767                let total = self.dim();
768                if rhs.len() != total || lhs.len() != total {
769                    return false;
770                }
771                let rhs_scaled: Vec<Number> =
772                    rhs.iter().zip(c.e.iter()).map(|(&r, &ei)| r * ei).collect();
773                if !self.solve_scaled_space(&rhs_scaled, lhs) {
774                    return false;
775                }
776                for (l, &fi) in lhs.iter_mut().zip(c.f.iter()) {
777                    *l *= fi;
778                }
779                true
780            }
781        }
782    }
783}