pounce_sensitivity/algorithm_backsolver.rs
1//! `PdSensBacksolver` — `SensBacksolver` adapter over the converged
2//! `PdFullSpaceSolver` from `pounce-algorithm`.
3//!
4//! This is the Phase B.2 piece tracked in
5//! [pounce#16](https://github.com/jkitchin/pounce/issues/16): it lets
6//! `pounce-sensitivity` drive backsolves against the real converged
7//! KKT factor, replacing the synthetic [`crate::DenseLuBacksolver`]
8//! used by Phase B.1 unit tests.
9//!
10//! # Use
11//!
12//! 1. Register an `on_converged` callback on `IpoptApplication` via
13//! [`pounce_algorithm::application::IpoptApplication::set_on_converged`].
14//! 2. Inside the callback, build a `PdSensBacksolver` from the four
15//! handles passed in (`data`, `cq`, `nlp`, `&mut pd_solver`).
16//! 3. Hand it to [`crate::SensApplication`] / a `SensStepCalc` /
17//! [`crate::compute_reduced_hessian`] like any other
18//! [`SensBacksolver`].
19//!
20//! Upstream `SensSimpleBacksolver`
21//! ([`ref/Ipopt/contrib/sIPOPT/src/SensSimpleBacksolver.cpp`](../../../ref/Ipopt/contrib/sIPOPT/src/SensSimpleBacksolver.cpp))
22//! is the analogous wrapper around `IpoptCalculatedQuantities` +
23//! `PDSystemSolver` upstream.
24//!
25//! # Flat-slice ↔ `IteratesVector` mapping
26//!
27//! The full primal-dual state of pounce's IPM is the eight-block
28//! compound `(x, s, λ_c, λ_d, z_l, z_u, v_l, v_u)` (see
29//! [`pounce_algorithm::iterates_vector::IteratesVector`]). This
30//! adapter packs / unpacks the flat slices that
31//! [`crate::SensBacksolver`] takes as the concatenation
32//! `x || s || λ_c || λ_d || z_l || z_u || v_l || v_u`, mirroring
33//! upstream's `CompoundVector` layout (`IpCompoundVector.hpp`).
34//!
35//! # Reference
36//!
37//! Pirnay, H.; López-Negrete, R.; Biegler, L. T. (2012). *Optimal
38//! sensitivity based on IPOPT*. Mathematical Programming Computation,
39//! **4**(4), 307–331. DOI:
40//! [10.1007/s12532-012-0043-2](https://doi.org/10.1007/s12532-012-0043-2).
41//! Verified via Crossref on 2026-05-13.
42
43use std::cell::RefCell;
44use std::rc::Rc;
45
46use pounce_algorithm::ipopt_cq::IpoptCqHandle;
47use pounce_algorithm::ipopt_data::IpoptDataHandle;
48use pounce_algorithm::iterates_vector::{IteratesVector, IteratesVectorMut};
49use pounce_algorithm::kkt::pd_full_space_solver::PdFullSpaceSolver;
50use pounce_common::types::{Index, Number};
51use pounce_linalg::dense_vector::DenseVector;
52use pounce_nlp::ipopt_nlp::IpoptNlp;
53
54use crate::backsolver::SensBacksolver;
55
56/// Adapter from `PdFullSpaceSolver` to [`SensBacksolver`]. Holds
57/// owning clones of the four pieces of the algorithm's converged
58/// state, plus the 8-block iterate template used to allocate fresh
59/// RHS / LHS vectors.
60///
61/// The PD solver lives behind an `Rc<RefCell<…>>` because
62/// [`SensBacksolver::solve`] is `&self` but the upstream signature
63/// for `PdFullSpaceSolver::solve` is `&mut self` (it caches the
64/// last-solve dependency tags and the augsys-improved flag). The
65/// `RefCell` is single-thread-only, single-borrow, exactly matching
66/// the call pattern from `pounce-sensitivity`'s pipeline.
67///
68/// Owning (rather than borrowing) the four handles is what lets a
69/// `PdSensBacksolver` outlive the `on_converged` callback frame —
70/// required by the public `Solver` session API in `pounce-algorithm`,
71/// which retains the backsolver for repeated `parametric_step` /
72/// `kkt_solve` / `compute_reduced_hessian` calls after the IPM has
73/// returned. The data, cq, and nlp handles are already
74/// `Rc<RefCell<…>>` cheap-clone handles upstream, so this carries no
75/// allocation overhead.
76#[derive(Clone)]
77pub struct PdSensBacksolver {
78 /// Shared, interior-mutable handle to the converged PD solver.
79 /// Cloned from `PdSearchDirCalc::pd_solver_rc()` at construction.
80 pd: Rc<RefCell<PdFullSpaceSolver>>,
81 data: IpoptDataHandle,
82 cq: IpoptCqHandle,
83 nlp: Rc<RefCell<dyn IpoptNlp>>,
84 /// Block dimensions in `(x, s, y_c, y_d, z_l, z_u, v_l, v_u)` order.
85 dims: [usize; 8],
86 /// 8-block prototype used to mint fresh vectors with the same
87 /// `VectorSpace`s as the converged iterate; cloned from
88 /// `data.borrow().curr`.
89 template: IteratesVector,
90 /// Natural-units row/column scaling pair (pounce#128). The IPM's
91 /// KKT factor is held in the NLP's internally **scaled** space
92 /// (objective factor `df`, per-row constraint factors `dc` / `dd`
93 /// from `nlp_scaling_method`; scaled multipliers `ỹ = (df/dc)·y`,
94 /// `z̃ = df·z`, `ṽ = (df/dd)·v`, scaled slack `s̃ = dd·s`). The
95 /// scaled 8-block primal-dual system is the two-sided diagonal
96 /// scaling `K̃ = E K F` of the natural-units system, with
97 /// per-block entries
98 ///
99 /// ```text
100 /// x s y_c y_d z_l/z_u v_l/v_u
101 /// E = df df/dd_i dc_i dd_i df df
102 /// F = 1 1/dd_i dc_i/df dd_i/df 1/df dd_r(j)/df
103 /// ```
104 ///
105 /// (`dd_r(j)` = the d-row scaling of the j-th finite d-bound,
106 /// through the `pd_l` / `pd_u` expansion). Hence
107 /// `K⁻¹ = F K̃⁻¹ E`: scale the RHS by `E`, back-solve against the
108 /// held factor, scale the result by `F`. Unlike a symmetric
109 /// congruence this needs no square root, so it covers a negative
110 /// `obj_scaling_factor` (maximization) and covers the z/v
111 /// bound-multiplier rows exactly (those rows admit no symmetric
112 /// diagonal: `K̃_{z,x} = df·Z·Pᵀ` but `K̃_{z,z} = X − x_L` is
113 /// unscaled). `None` ⇔ scaling inactive, identity.
114 conj: Option<Rc<ConjPair>>,
115}
116
117/// Left/right diagonal pair for the natural-units back-solve; see the
118/// `conj` field doc on [`PdSensBacksolver`]. Both vectors are
119/// flat-KKT-length, in the `x‖s‖y_c‖y_d‖z_l‖z_u‖v_l‖v_u` packing.
120struct ConjPair {
121 /// `E`: multiplied into the RHS before the scaled-space solve.
122 e: Vec<Number>,
123 /// `F`: multiplied into the solution after the scaled-space solve.
124 f: Vec<Number>,
125}
126
127impl PdSensBacksolver {
128 /// Construct from the four handles handed in by the `on_converged`
129 /// callback. Errors if `data` has no `curr` (i.e. the algorithm
130 /// never reached an iterate — should not happen on
131 /// `SolveSucceeded`) or the NLP reports scaling data inconsistent
132 /// with the converged iterate (see [`Self::natural_units_conj`]).
133 pub fn new(
134 data: &IpoptDataHandle,
135 cq: &IpoptCqHandle,
136 nlp: &Rc<RefCell<dyn IpoptNlp>>,
137 pd: Rc<RefCell<PdFullSpaceSolver>>,
138 ) -> Result<Self, String> {
139 let curr = data
140 .borrow()
141 .curr
142 .clone()
143 .ok_or_else(|| "no current iterate at convergence".to_string())?;
144 let dims = [
145 curr.x.dim() as usize,
146 curr.s.dim() as usize,
147 curr.y_c.dim() as usize,
148 curr.y_d.dim() as usize,
149 curr.z_l.dim() as usize,
150 curr.z_u.dim() as usize,
151 curr.v_l.dim() as usize,
152 curr.v_u.dim() as usize,
153 ];
154 let conj = Self::natural_units_conj(nlp, &dims)?;
155 Ok(Self {
156 pd,
157 data: Rc::clone(data),
158 cq: Rc::clone(cq),
159 nlp: Rc::clone(nlp),
160 dims,
161 template: curr,
162 conj,
163 })
164 }
165
166 /// Build the natural-units scaling pair `(E, F)` from the NLP's
167 /// effective scaling (see the field doc on [`Self::conj`]).
168 /// Returns `Ok(None)` when no scaling is active. Errors when the
169 /// NLP reports scaling data inconsistent with the converged
170 /// iterate's block dimensions (would silently corrupt every
171 /// back-solve) or a zero/non-finite `df`.
172 fn natural_units_conj(
173 nlp: &Rc<RefCell<dyn IpoptNlp>>,
174 dims: &[usize; 8],
175 ) -> Result<Option<Rc<ConjPair>>, String> {
176 let nlp_ref = nlp.borrow();
177 let df = nlp_ref.obj_scaling_factor();
178 let dc = nlp_ref.c_scale_vec();
179 let dd = nlp_ref.d_scale_vec();
180 if df == 1.0 && dc.is_none() && dd.is_none() {
181 return Ok(None);
182 }
183 // df may be negative (obj_scaling_factor < 0 means maximize);
184 // the two-sided scaling needs no square root, only df ≠ 0.
185 if !df.is_finite() || df == 0.0 {
186 return Err(format!("invalid obj_scaling_factor {df}"));
187 }
188 if let Some(v) = &dc {
189 if v.len() != dims[2] {
190 return Err(format!("c_scale length {} != y_c dim {}", v.len(), dims[2]));
191 }
192 }
193 if let Some(v) = &dd {
194 if v.len() != dims[3] || dims[1] != dims[3] {
195 return Err(format!(
196 "d_scale length {} != y_d dim {} (s dim {})",
197 v.len(),
198 dims[3],
199 dims[1]
200 ));
201 }
202 }
203 // Per-entry d-row scale for the compressed v_l / v_u blocks:
204 // entry j of v_l covers the d row pd_l.expanded_pos[j].
205 let v_row_scale = |pm: Rc<dyn pounce_linalg::matrix::Matrix>,
206 n_v: usize,
207 which: &str|
208 -> Result<Vec<Number>, String> {
209 let Some(dd) = &dd else {
210 return Ok(vec![1.0; n_v]);
211 };
212 if n_v == 0 {
213 return Ok(Vec::new());
214 }
215 let Some(em) = pm
216 .as_any()
217 .downcast_ref::<pounce_linalg::expansion_matrix::ExpansionMatrix>()
218 else {
219 return Err(format!("{which} is not an ExpansionMatrix"));
220 };
221 let pos = em.expanded_pos_indices();
222 if pos.len() != n_v {
223 return Err(format!(
224 "{which} expansion length {} != {} block dim {}",
225 pos.len(),
226 which,
227 n_v
228 ));
229 }
230 pos.iter()
231 .map(|&r| {
232 dd.get(r as usize).copied().ok_or_else(|| {
233 format!(
234 "{which} expansion row {r} out of d_scale range {}",
235 dd.len()
236 )
237 })
238 })
239 .collect()
240 };
241 let vl_dd = v_row_scale(nlp_ref.pd_l(), dims[6], "pd_l")?;
242 let vu_dd = v_row_scale(nlp_ref.pd_u(), dims[7], "pd_u")?;
243 drop(nlp_ref);
244
245 let total: usize = dims.iter().sum();
246 let mut e = Vec::with_capacity(total);
247 let mut f = Vec::with_capacity(total);
248 // x block: E = df, F = 1.
249 e.extend(std::iter::repeat_n(df, dims[0]));
250 f.extend(std::iter::repeat_n(1.0, dims[0]));
251 // s block: E = df/dd_i, F = 1/dd_i (slacks live in scaled d-space).
252 match &dd {
253 Some(v) => {
254 e.extend(v.iter().map(|&ddi| df / ddi));
255 f.extend(v.iter().map(|&ddi| 1.0 / ddi));
256 }
257 None => {
258 e.extend(std::iter::repeat_n(df, dims[1]));
259 f.extend(std::iter::repeat_n(1.0, dims[1]));
260 }
261 }
262 // y_c block: E = dc_i, F = dc_i/df.
263 match &dc {
264 Some(v) => {
265 e.extend(v.iter().copied());
266 f.extend(v.iter().map(|&dci| dci / df));
267 }
268 None => {
269 e.extend(std::iter::repeat_n(1.0, dims[2]));
270 f.extend(std::iter::repeat_n(1.0 / df, dims[2]));
271 }
272 }
273 // y_d block: E = dd_i, F = dd_i/df.
274 match &dd {
275 Some(v) => {
276 e.extend(v.iter().copied());
277 f.extend(v.iter().map(|&ddi| ddi / df));
278 }
279 None => {
280 e.extend(std::iter::repeat_n(1.0, dims[3]));
281 f.extend(std::iter::repeat_n(1.0 / df, dims[3]));
282 }
283 }
284 // z_l / z_u blocks: E = df, F = 1/df (z̃ = df·z; bounds on x
285 // are unscaled so the slack diagonal X − x_L is shared by both
286 // systems).
287 e.extend(std::iter::repeat_n(df, dims[4] + dims[5]));
288 f.extend(std::iter::repeat_n(1.0 / df, dims[4] + dims[5]));
289 // v_l / v_u blocks: E = df, F = dd_r/df (ṽ = (df/dd)·v and the
290 // slack diagonal s̃ − d̃_l = dd·(s − d_l) carries the d-row
291 // scale).
292 e.extend(std::iter::repeat_n(df, dims[6] + dims[7]));
293 f.extend(vl_dd.iter().map(|&ddr| ddr / df));
294 f.extend(vu_dd.iter().map(|&ddr| ddr / df));
295 Ok(Some(Rc::new(ConjPair { e, f })))
296 }
297
298 /// Effective objective scaling factor `df` of the converged NLP
299 /// (1.0 when no scaling is active).
300 pub fn obj_scaling_factor(&self) -> Number {
301 self.nlp.borrow().obj_scaling_factor()
302 }
303
304 /// Effective NLP scaling at convergence:
305 /// `(obj_scaling_factor, c_scale, d_scale)`. The vectors are
306 /// `None` when the corresponding block carries no row scaling.
307 pub fn nlp_scaling(&self) -> (Number, Option<Vec<Number>>, Option<Vec<Number>>) {
308 let n = self.nlp.borrow();
309 (n.obj_scaling_factor(), n.c_scale_vec(), n.d_scale_vec())
310 }
311
312 /// Inertia-correction perturbations `(δ_x, δ_s, δ_c, δ_d)` baked
313 /// into the held KKT factor (the IPM's `current_perturbation`
314 /// state at convergence). All zero ⇔ the final factorization was
315 /// unregularized and the natural-units back-solves invert the
316 /// exact KKT matrix. Nonzero ⇔ the factor carries a (scaled-space)
317 /// regularization, so sensitivity outputs — covariance in
318 /// particular — are perturbed and no longer exactly
319 /// scaling-invariant; consumers should check this before trusting
320 /// `-inv(reduced_hessian)` on ill-conditioned problems
321 /// (pounce#128 follow-up).
322 pub fn kkt_perturbations(&self) -> [Number; 4] {
323 let p = self.data.borrow().perturbations;
324 [p.delta_x, p.delta_s, p.delta_c, p.delta_d]
325 }
326
327 /// Map user-facing 0-based `g(x)` indices of parameter-pin
328 /// equality constraints to flat KKT rows **and** the pin rows'
329 /// `dc_i` scaling factors, in one pass. The KKT row of pin `i` is
330 /// `n_x + n_s + c_block_idx`, i.e. the matching `y_c` slot, found
331 /// through `IpoptNlp::full_g_to_c_block` so the c/d split's row
332 /// permutation is honored (pounce#128 follow-up: the previous
333 /// direct `n_x + n_s + g_idx` mapping silently picked wrong rows
334 /// when inequalities preceded the pins). The scales are 1.0 when
335 /// no constraint scaling is active; they relate the natural and
336 /// solver-space reduced Hessians via
337 /// `H̃_ij = (df / (dc_i·dc_j)) · H_ij`. Errors when a pin index
338 /// is out of range or refers to an inequality row.
339 pub fn pin_rows_and_c_scales(
340 &self,
341 pin_g_indices: &[Index],
342 ) -> Result<(Vec<Index>, Vec<Number>), String> {
343 let y_c_offset = (self.dims[0] + self.dims[1]) as Index;
344 let nlp = self.nlp.borrow();
345 let dc = nlp.c_scale_vec();
346 let n_full_g = nlp.n_full_g();
347 let mut rows = Vec::with_capacity(pin_g_indices.len());
348 let mut scales = Vec::with_capacity(pin_g_indices.len());
349 for &gi in pin_g_indices {
350 // n_full_g() defaults to 0 for IpoptNlp impls that don't
351 // report it; only range-check when it's meaningful.
352 if gi < 0 || (n_full_g > 0 && gi >= n_full_g) {
353 return Err(format!(
354 "pin constraint index {gi} out of range [0, m={n_full_g})"
355 ));
356 }
357 let Some(ci) = nlp.full_g_to_c_block(gi) else {
358 return Err(format!(
359 "pin constraint index {gi} is an inequality (not an equality row); \
360 parameter pins must be exact equalities"
361 ));
362 };
363 rows.push(y_c_offset + ci);
364 scales.push(dc.as_ref().map(|v| v[ci as usize]).unwrap_or(1.0));
365 }
366 Ok((rows, scales))
367 }
368
369 /// KKT-row half of [`Self::pin_rows_and_c_scales`].
370 pub fn map_pin_g_to_kkt_rows(&self, pin_g_indices: &[Index]) -> Result<Vec<Index>, String> {
371 Ok(self.pin_rows_and_c_scales(pin_g_indices)?.0)
372 }
373
374 /// Scaling half of [`Self::pin_rows_and_c_scales`].
375 pub fn pin_c_scales(&self, pin_g_indices: &[Index]) -> Result<Vec<Number>, String> {
376 Ok(self.pin_rows_and_c_scales(pin_g_indices)?.1)
377 }
378
379 /// Block dimensions of the compound KKT vector at convergence, in
380 /// `(x, s, y_c, y_d, z_l, z_u, v_l, v_u)` order. Sum equals
381 /// [`SensBacksolver::dim`]. Useful when a caller needs to compute
382 /// the flat offset of a non-x block (e.g. `n_x + n_s` for the
383 /// start of the equality-multiplier `y_c` block).
384 pub fn block_dims(&self) -> [usize; 8] {
385 self.dims
386 }
387
388 /// Map a 0-based **full-g** index (user-TNLP `g(x)` order) to its
389 /// 0-based position in the equality-multiplier `y_c` block, or
390 /// `None` when the constraint is an inequality (it lives in the `d`
391 /// block, not `y_c`). Delegates to the held NLP's c/d-split map.
392 ///
393 /// Pin-row construction must route through this: the flat KKT row of
394 /// a pinned equality is `n_x + n_s + full_g_to_c_block(g)`, NOT
395 /// `n_x + n_s + g` — those differ whenever any inequality precedes
396 /// the pinned equality in `g(x)`.
397 pub fn full_g_to_c_block(&self, full_idx: Index) -> Option<Index> {
398 self.nlp.borrow().full_g_to_c_block(full_idx)
399 }
400
401 /// Cumulative block offsets: `offset(i)` is the start index of
402 /// block `i` in the flat slice.
403 fn offsets(&self) -> [usize; 9] {
404 let mut o = [0usize; 9];
405 for i in 0..8 {
406 o[i + 1] = o[i] + self.dims[i];
407 }
408 o
409 }
410
411 /// Pack a flat slice into a freshly-allocated `IteratesVectorMut`
412 /// shaped like the converged iterate.
413 fn pack(&self, flat: &[Number]) -> Result<IteratesVectorMut, ()> {
414 let mut out = self.template.make_new_zeroed();
415 let off = self.offsets();
416 let blocks: [&mut Box<dyn pounce_linalg::vector::Vector>; 8] = [
417 &mut out.x,
418 &mut out.s,
419 &mut out.y_c,
420 &mut out.y_d,
421 &mut out.z_l,
422 &mut out.z_u,
423 &mut out.v_l,
424 &mut out.v_u,
425 ];
426 for (i, blk) in blocks.into_iter().enumerate() {
427 let slice = &flat[off[i]..off[i + 1]];
428 let dv = blk.as_any_mut().downcast_mut::<DenseVector>().ok_or(())?;
429 dv.set_values(slice);
430 }
431 Ok(out)
432 }
433
434 /// Read an `IteratesVectorMut` into a flat slice. Uses
435 /// [`DenseVector::expanded_values`] rather than `values()` so
436 /// blocks that the IPM left in homogeneous-scalar form (typical
437 /// for empty z_l/z_u/v_l/v_u when the TNLP has no bounds) are
438 /// materialized rather than panicking.
439 fn unpack(&self, iv: &IteratesVectorMut, out: &mut [Number]) -> Result<(), ()> {
440 let off = self.offsets();
441 let blocks: [&Box<dyn pounce_linalg::vector::Vector>; 8] = [
442 &iv.x, &iv.s, &iv.y_c, &iv.y_d, &iv.z_l, &iv.z_u, &iv.v_l, &iv.v_u,
443 ];
444 for (i, blk) in blocks.into_iter().enumerate() {
445 let dst = &mut out[off[i]..off[i + 1]];
446 if dst.is_empty() {
447 continue;
448 }
449 let dv = (**blk).as_any().downcast_ref::<DenseVector>().ok_or(())?;
450 let ev = dv.expanded_values();
451 dst.copy_from_slice(&ev);
452 }
453 Ok(())
454 }
455}
456
457impl PdSensBacksolver {
458 /// Batched-RHS back-solve over the held factor. `rhs_flat` and
459 /// `lhs_flat` are row-major `(n_rhs, dim)` buffers. Equivalent to
460 /// looping [`SensBacksolver::solve`] over each row but reuses one
461 /// frozen `IteratesVector` for the RHS and one `IteratesVectorMut`
462 /// for the result across all `n_rhs` calls into
463 /// [`PdFullSpaceSolver::solve`]. The pack step writes into the
464 /// existing `DenseVector` storage via `Rc::get_mut` +
465 /// `set_values`, and the unpack step reads it back via `values()`
466 /// /`scalar()` — skipping the per-call 8-block `make_new_zeroed`
467 /// (Box alloc) in `pack` and the per-block `expanded_values()` Vec
468 /// alloc in `unpack` that otherwise dominate the held-factor
469 /// back-solve cost under `jax.jacrev` over a JaxProblem solve
470 /// (pounce#77 follow-up).
471 ///
472 /// The matrix and perturbation state inside `PdFullSpaceSolver`
473 /// are unchanged across calls, so each iteration hits the cached
474 /// fast path in `solve_once` (`uptodate && !pretend_singular`).
475 ///
476 /// Like [`SensBacksolver::solve`], results are in **natural
477 /// (unscaled) units** — see [`Self::solve_many_scaled_space`] for
478 /// the raw solver-space back-solve.
479 pub fn solve_many(&self, rhs_flat: &[Number], lhs_flat: &mut [Number], n_rhs: usize) -> bool {
480 match &self.conj {
481 None => self.solve_many_scaled_space(rhs_flat, lhs_flat, n_rhs),
482 Some(c) => {
483 let total = self.dim();
484 if rhs_flat.len() != n_rhs * total || lhs_flat.len() != n_rhs * total {
485 return false;
486 }
487 let mut rhs_scaled = rhs_flat.to_vec();
488 for row in rhs_scaled.chunks_mut(total) {
489 for (r, &ei) in row.iter_mut().zip(c.e.iter()) {
490 *r *= ei;
491 }
492 }
493 if !self.solve_many_scaled_space(&rhs_scaled, lhs_flat, n_rhs) {
494 return false;
495 }
496 for row in lhs_flat.chunks_mut(total) {
497 for (l, &fi) in row.iter_mut().zip(c.f.iter()) {
498 *l *= fi;
499 }
500 }
501 true
502 }
503 }
504 }
505
506 /// Batched-RHS back-solve against the held factor in the solver's
507 /// internal **scaled** space (no natural-units conjugation). Same
508 /// buffer contract as [`Self::solve_many`].
509 pub fn solve_many_scaled_space(
510 &self,
511 rhs_flat: &[Number],
512 lhs_flat: &mut [Number],
513 n_rhs: usize,
514 ) -> bool {
515 let total = self.dim();
516 if rhs_flat.len() != n_rhs * total || lhs_flat.len() != n_rhs * total {
517 return false;
518 }
519 if n_rhs == 0 {
520 return true;
521 }
522 let off = self.offsets();
523
524 // Tier 1: fully-inline flat-slice path. `PdFullSpaceSolver::
525 // solve_many_cached_flat` downcasts the slack / z / v vectors to
526 // `DenseVector` and the bound-expansion matrices to
527 // `ExpansionMatrix` once at the top, then runs Phase 1 / Phase 3
528 // as raw scatter-add / divide loops on flat slices with no dyn
529 // dispatch in the per-RHS inner loops. Returns `None` if a
530 // downcast fails (homogeneous-on-non-empty block, unusual matrix
531 // type) — we fall to Tier 2.
532 {
533 let mut pd_ref = self.pd.borrow_mut();
534 let fast_flat = pd_ref.solve_many_cached_flat(
535 &self.data, &self.cq, &self.nlp, n_rhs, rhs_flat, lhs_flat, self.dims,
536 );
537 match fast_flat {
538 Some(true) => return true,
539 Some(false) => return false,
540 None => { /* fall through to Tier 2 */ }
541 }
542 }
543
544 // Tier 2: closure-based cached-factor path. Same single
545 // back-substitution through the linsol, but Phase 1 / Phase 3
546 // go through `dyn Vector` / `dyn Matrix` ops on a per-RHS
547 // `IteratesVectorMut`. Slower than Tier 1 but correct for
548 // homogeneous DenseVectors and non-`ExpansionMatrix` bound
549 // expansions.
550 {
551 let mut pd_ref = self.pd.borrow_mut();
552 let fast = pd_ref.solve_many_cached(
553 &self.data,
554 &self.cq,
555 &self.nlp,
556 n_rhs,
557 |k, iv| {
558 let row = &rhs_flat[k * total..(k + 1) * total];
559 let _ = write_rhs_box(&mut iv.x, &row[off[0]..off[1]])
560 && write_rhs_box(&mut iv.s, &row[off[1]..off[2]])
561 && write_rhs_box(&mut iv.y_c, &row[off[2]..off[3]])
562 && write_rhs_box(&mut iv.y_d, &row[off[3]..off[4]])
563 && write_rhs_box(&mut iv.z_l, &row[off[4]..off[5]])
564 && write_rhs_box(&mut iv.z_u, &row[off[5]..off[6]])
565 && write_rhs_box(&mut iv.v_l, &row[off[6]..off[7]])
566 && write_rhs_box(&mut iv.v_u, &row[off[7]..off[8]]);
567 },
568 |k, iv| {
569 let row = &mut lhs_flat[k * total..(k + 1) * total];
570 let _ = read_res_block(&*iv.x, &mut row[off[0]..off[1]])
571 && read_res_block(&*iv.s, &mut row[off[1]..off[2]])
572 && read_res_block(&*iv.y_c, &mut row[off[2]..off[3]])
573 && read_res_block(&*iv.y_d, &mut row[off[3]..off[4]])
574 && read_res_block(&*iv.z_l, &mut row[off[4]..off[5]])
575 && read_res_block(&*iv.z_u, &mut row[off[5]..off[6]])
576 && read_res_block(&*iv.v_l, &mut row[off[6]..off[7]])
577 && read_res_block(&*iv.v_u, &mut row[off[7]..off[8]]);
578 },
579 );
580 match fast {
581 Some(true) => return true,
582 Some(false) => return false,
583 None => { /* fall through to per-RHS loop */ }
584 }
585 }
586
587 // Per-RHS fallback: reuse one frozen rhs and one mut sol across
588 // all n_rhs `solve` calls.
589 let rhs_mut0 = self.template.make_new_zeroed();
590 let mut rhs_iv = rhs_mut0.freeze();
591 let mut res_iv = self.template.make_new_zeroed();
592
593 let mut pd_ref = self.pd.borrow_mut();
594 for k in 0..n_rhs {
595 let rhs_row = &rhs_flat[k * total..(k + 1) * total];
596 let lhs_row = &mut lhs_flat[k * total..(k + 1) * total];
597
598 if !write_rhs_block(&mut rhs_iv.x, &rhs_row[off[0]..off[1]])
599 || !write_rhs_block(&mut rhs_iv.s, &rhs_row[off[1]..off[2]])
600 || !write_rhs_block(&mut rhs_iv.y_c, &rhs_row[off[2]..off[3]])
601 || !write_rhs_block(&mut rhs_iv.y_d, &rhs_row[off[3]..off[4]])
602 || !write_rhs_block(&mut rhs_iv.z_l, &rhs_row[off[4]..off[5]])
603 || !write_rhs_block(&mut rhs_iv.z_u, &rhs_row[off[5]..off[6]])
604 || !write_rhs_block(&mut rhs_iv.v_l, &rhs_row[off[6]..off[7]])
605 || !write_rhs_block(&mut rhs_iv.v_u, &rhs_row[off[7]..off[8]])
606 {
607 return false;
608 }
609
610 let ok = pd_ref.solve(
611 &self.data,
612 &self.cq,
613 &self.nlp,
614 1.0,
615 0.0,
616 &rhs_iv,
617 &mut res_iv,
618 /* allow_inexact = */ true,
619 /* improve_solution = */ false,
620 );
621 if !ok {
622 return false;
623 }
624
625 if !read_res_block(&*res_iv.x, &mut lhs_row[off[0]..off[1]])
626 || !read_res_block(&*res_iv.s, &mut lhs_row[off[1]..off[2]])
627 || !read_res_block(&*res_iv.y_c, &mut lhs_row[off[2]..off[3]])
628 || !read_res_block(&*res_iv.y_d, &mut lhs_row[off[3]..off[4]])
629 || !read_res_block(&*res_iv.z_l, &mut lhs_row[off[4]..off[5]])
630 || !read_res_block(&*res_iv.z_u, &mut lhs_row[off[5]..off[6]])
631 || !read_res_block(&*res_iv.v_l, &mut lhs_row[off[6]..off[7]])
632 || !read_res_block(&*res_iv.v_u, &mut lhs_row[off[7]..off[8]])
633 {
634 return false;
635 }
636 }
637 true
638 }
639}
640
641/// Write `slice` into the `DenseVector` behind `b` in place. Used by
642/// the fast path's `write_rhs` closure, where the new
643/// `PdFullSpaceSolver::solve_many_cached` API hands back an
644/// `IteratesVectorMut` (Box-backed blocks).
645fn write_rhs_box(b: &mut Box<dyn pounce_linalg::vector::Vector>, slice: &[Number]) -> bool {
646 if slice.is_empty() {
647 return true;
648 }
649 let Some(dv) = b.as_any_mut().downcast_mut::<DenseVector>() else {
650 return false;
651 };
652 dv.set_values(slice);
653 true
654}
655
656/// Write `slice` into the `DenseVector` behind `rc` in place. Returns
657/// `false` if the Rc is unexpectedly shared (would indicate a bug in
658/// `PdFullSpaceSolver::solve`'s borrow discipline — it should never
659/// `Rc::clone` from the rhs vector) or if the block is not a
660/// `DenseVector`.
661fn write_rhs_block(rc: &mut Rc<dyn pounce_linalg::vector::Vector>, slice: &[Number]) -> bool {
662 if slice.is_empty() {
663 return true;
664 }
665 let Some(v) = Rc::get_mut(rc) else {
666 return false;
667 };
668 let Some(dv) = v.as_any_mut().downcast_mut::<DenseVector>() else {
669 return false;
670 };
671 dv.set_values(slice);
672 true
673}
674
675/// Read the `DenseVector` behind `blk` into `dst`. Handles the
676/// homogeneous case (empty z/v blocks for a TNLP with no bounds) by
677/// broadcasting the scalar rather than calling `expanded_values()`,
678/// which would allocate a fresh `Vec<Number>` every call.
679fn read_res_block(blk: &dyn pounce_linalg::vector::Vector, dst: &mut [Number]) -> bool {
680 if dst.is_empty() {
681 return true;
682 }
683 let Some(dv) = blk.as_any().downcast_ref::<DenseVector>() else {
684 return false;
685 };
686 if dv.is_homogeneous() {
687 let s = dv.scalar();
688 for x in dst.iter_mut() {
689 *x = s;
690 }
691 } else {
692 dst.copy_from_slice(dv.values());
693 }
694 true
695}
696
697impl PdSensBacksolver {
698 /// Single-RHS back-solve against the held factor in the solver's
699 /// internal **scaled** space (no natural-units conjugation). This
700 /// is the value [`SensBacksolver::solve`] returned before
701 /// pounce#128; kept for callers that want the raw factor.
702 pub fn solve_scaled_space(&self, rhs: &[Number], lhs: &mut [Number]) -> bool {
703 let total = self.dim();
704 if rhs.len() != total || lhs.len() != total {
705 return false;
706 }
707 // Pack rhs into block form.
708 let rhs_mut = match self.pack(rhs) {
709 Ok(v) => v,
710 Err(()) => return false,
711 };
712 let rhs_iv = rhs_mut.freeze();
713 // Fresh result slot, zeroed.
714 let mut res_iv = self.template.make_new_zeroed();
715
716 // K · lhs = rhs ⇒ solve(α=1, β=0, rhs, res) writes
717 // res = K⁻¹ · rhs.
718 //
719 // `allow_inexact=true` mirrors upstream sIPOPT's
720 // `SensSimpleBacksolver`: skip `PdFullSpaceSolver`'s iterative-
721 // refinement loop and accept the first back-solve against the
722 // held factor. The IPM-level refinement (`min_refinement_steps
723 // = 1`, residual_ratio_max = 1e-10`) is there to clean up
724 // numerical noise during forward IPM steps; for the held-factor
725 // back-solve used by sens / JaxProblem bwd, it ~doubles the
726 // per-call cost and produces gains that are below `tol`. Under
727 // `jax.jacrev` over a JaxProblem solve this dominates the wall
728 // time at moderate `n+m` (pounce#77 follow-up).
729 let ok = {
730 let mut pd_ref = self.pd.borrow_mut();
731 pd_ref.solve(
732 &self.data,
733 &self.cq,
734 &self.nlp,
735 1.0,
736 0.0,
737 &rhs_iv,
738 &mut res_iv,
739 /* allow_inexact = */ true,
740 /* improve_solution = */ false,
741 )
742 };
743 if !ok {
744 return false;
745 }
746 self.unpack(&res_iv, lhs).is_ok()
747 }
748}
749
750impl SensBacksolver for PdSensBacksolver {
751 fn dim(&self) -> usize {
752 self.dims.iter().sum()
753 }
754
755 /// Solve `K · lhs = rhs` against the converged factor, in
756 /// **natural (unscaled) units** (pounce#128): when the NLP carries
757 /// active scaling (`nlp_scaling_method`, `obj_scaling_factor`,
758 /// user scaling) the RHS is pre-multiplied by `E` and the result
759 /// post-multiplied by `F` (see the `conj` field doc), so
760 /// `lhs = K_natural⁻¹ rhs` for **all eight blocks** — including
761 /// the z/v bound-multiplier rows. Use
762 /// [`Self::solve_scaled_space`] for the raw factor.
763 fn solve(&self, rhs: &[Number], lhs: &mut [Number]) -> bool {
764 match &self.conj {
765 None => self.solve_scaled_space(rhs, lhs),
766 Some(c) => {
767 let total = self.dim();
768 if rhs.len() != total || lhs.len() != total {
769 return false;
770 }
771 let rhs_scaled: Vec<Number> =
772 rhs.iter().zip(c.e.iter()).map(|(&r, &ei)| r * ei).collect();
773 if !self.solve_scaled_space(&rhs_scaled, lhs) {
774 return false;
775 }
776 for (l, &fi) in lhs.iter_mut().zip(c.f.iter()) {
777 *l *= fi;
778 }
779 true
780 }
781 }
782 }
783}