Skip to main content

gam_solve/
active_set.rs

1use crate::estimate::EstimationError;
2use gam_linalg::faer_ndarray::{FaerArrayView, FaerLinalgError, FaerSvd, array1_to_col_matmut};
3use gam_linalg::utils::{StableSolver, array_is_finite, boundary_hit_step_fraction};
4use faer::linalg::solvers::{Lblt as FaerLblt, Solve as FaerSolve};
5use faer::{Side, Unbind};
6use gam_problem::LinearInequalityConstraints;
7use ndarray::{Array1, Array2, s};
8use serde::{Deserialize, Serialize};
9use std::cell::Cell;
10use std::collections::HashSet;
11
12/// Primal-feasibility tolerance the inequality-constrained active-set Newton
13/// solver guarantees on its returned iterate, measured in the *scaled*
14/// constraint-row coordinate system in which `A * beta >= b` is expressed.
15///
16/// The solver accepts a step when the worst scaled violation
17/// `max_i (b_i - a_i^T beta)` is below this threshold (see the acceptance
18/// gate in [`solve_linear_constrained_newton_step`] and the KKT diagnostics
19/// in [`compute_constraint_kkt_diagnostics`]). Any consumer that re-derives a
20/// raw (un-scaled) feasibility tolerance from a returned iterate must scale
21/// this value by the per-row normalization that the constraint builder
22/// applied; demanding tighter feasibility than this is inconsistent with the
23/// solver contract and will spuriously reject valid boundary solutions.
24pub const ACTIVE_SET_PRIMAL_FEASIBILITY_TOL: f64 = 1e-8;
25
26/// Stationarity tolerance for the strong-KKT acceptance gate: the projected
27/// (working-set) gradient residual ‖∇L − Aᵀλ‖∞, either absolute or relative to
28/// `max(1, ‖∇L‖∞)`, must fall below this to certify a constrained stationary
29/// point. Matched against `ACTIVE_SET_KKT_COMPLEMENTARITY_TOL` so both KKT
30/// residual channels are certified at compatible scales.
31const ACTIVE_SET_KKT_STATIONARITY_TOL: f64 = 2e-6;
32
33/// Complementarity-slackness tolerance for the KKT acceptance gate:
34/// `max_i |λ_i · slack_i|` must fall below this for the
35/// active-inactive partition to be consistent.
36const ACTIVE_SET_KKT_COMPLEMENTARITY_TOL: f64 = 1e-6;
37
38/// Dual-feasibility tolerance for the KKT acceptance gate: every working-set
39/// multiplier must satisfy `λ_i ≥ −ACTIVE_SET_KKT_DUAL_FEASIBILITY_TOL` (a
40/// strictly-negative multiplier means the constraint should be released).
41const ACTIVE_SET_KKT_DUAL_FEASIBILITY_TOL: f64 = 1e-8;
42
43/// Relaxed stationarity tolerance accepted only on a *genuinely degenerate
44/// boundary face* — one whose active rows are linearly dependent
45/// (`rank(A_active) < n_active`), so the active-row multipliers are non-unique
46/// and the exact projected gradient cannot reach
47/// `ACTIVE_SET_KKT_STATIONARITY_TOL`. Still requires primal feasibility,
48/// complementarity, and a relative-stationarity backstop.
49///
50/// Public so the outer REML / PIRLS validation gate can apply the same
51/// relaxation when the diagnostic reports a rank-deficient active face — a
52/// strict 5e-6 check there would otherwise refuse iterates that the inner
53/// active-set solver legitimately certified via its own `degenerate_boundary_ok`
54/// clause.
55///
56/// NOTE: this is *not* the mechanism that fixes the `shape=concave` /
57/// `shape=convex` cold-vs-warm cache divergence (#873). The B-spline shape path
58/// reparameterizes curvature into independent *coordinate lower bounds*
59/// `γ_j ≥ 0` (see `shape_lower_bounds_local`); any subset of those active rows
60/// is full rank, so `working_set_rank_deficient` stays `false` and this
61/// relaxation never fires for them — and must not be widened to. That bug is a
62/// *seed* problem (a cold seed landing on the cone vertex with every curvature
63/// row tight); it is fixed at the source by
64/// `project_point_strictly_into_feasible_cone`, which starts the inner solve
65/// strictly inside the cone so the strict tolerance is reachable.
66pub(crate) const ACTIVE_SET_KKT_DEGENERATE_STATIONARITY_TOL: f64 = 1e-3;
67
68/// Relative scale on the predicted-decrease test `predicted_delta ≤
69/// −ε·(1 + ‖∇L‖∞·‖d‖∞)`: when the working-set Newton step still buys a
70/// quadratic-model decrease at this relative margin the step is a usable
71/// descent direction even if the KKT residual has not yet tightened.
72const ACTIVE_SET_MODEL_DESCENT_REL_TOL: f64 = 1e-10;
73
74/// KKT diagnostics for inequality-constrained Newton subproblems.
75///
76/// Constraints are represented as `A * beta >= b` in the same coefficient
77/// coordinate system as the returned `beta`.
78///
79/// **Invariants** (held by all producers; not enforced at consumer boundary):
80/// - `n_active <= n_constraints` (a row cannot be active twice).
81/// - All four residual components (`primal_feasibility`, `dual_feasibility`,
82///   `complementarity`, `stationarity`) are `>= 0.0` and finite.
83/// - `active_tolerance >= 0.0` and finite.
84#[derive(Clone, Debug, Serialize, Deserialize)]
85pub struct ConstraintKktDiagnostics {
86    /// Number of inequality rows.
87    pub n_constraints: usize,
88    /// Number of rows considered active (`slack <= active_tolerance`).
89    pub n_active: usize,
90    /// Maximum primal feasibility violation: `max_i max(0, b_i - a_i^T beta)`.
91    pub primal_feasibility: f64,
92    /// Maximum dual feasibility violation: `max_i max(0, -lambda_i)`.
93    pub dual_feasibility: f64,
94    /// Maximum complementarity residual: `max_i |lambda_i * slack_i|`.
95    pub complementarity: f64,
96    /// Stationarity residual: `||grad - A^T lambda||_inf`.
97    pub stationarity: f64,
98    /// Tolerance used to classify active constraints from slacks.
99    pub active_tolerance: f64,
100    /// `true` when the active rows are linearly dependent (`rank(A_active) <
101    /// n_active`) — a *degenerate boundary face*. On such a face the active-row
102    /// multipliers are non-unique and the strict stationarity tolerance is
103    /// unreachable by construction. The inner active-set solver certifies these
104    /// iterates via its `ACTIVE_SET_KKT_DEGENERATE_STATIONARITY_TOL` relaxation;
105    /// the outer validation gate must consult this flag to apply the matching
106    /// relaxation, or it will refuse a legitimately-converged constrained
107    /// optimum and abort the REML startup loop.
108    ///
109    /// NOTE: B-spline `shape=concave`/`shape=convex` faces are *not* degenerate
110    /// — that path reparameterizes curvature into independent coordinate lower
111    /// bounds `γ_j ≥ 0` (full-rank active subsets), so this flag stays `false`
112    /// for them. Their cold-start fragility is a seed problem fixed by the
113    /// strictly-interior seed, not by this relaxation.
114    #[serde(default)]
115    pub working_set_rank_deficient: bool,
116    /// Inf-norm of the (raw, unprojected) gradient at `beta`, `‖gradient‖∞` —
117    /// the natural scale of the stationarity residual. A converged constrained
118    /// optimum drives `stationarity = ‖grad − Aᵀλ‖∞` to zero *relative to* this
119    /// scale, not to a fixed absolute floor: the profiled REML latent objective
120    /// carries an O(n) gradient magnitude even at a genuine stationary point
121    /// (issue #879), so a bare absolute stationarity gate is unreachable there
122    /// by construction. The inner active-set solver already certifies
123    /// convergence on the scale-invariant ratio
124    /// `stationarity / max(gradient_scale, 1)` (its `stationarity_rel` path
125    /// against `ACTIVE_SET_KKT_STATIONARITY_TOL`); the outer validation gate
126    /// [`crate::estimate::reml::outer_eval`]`::enforce_constraint_kkt` consults this
127    /// field to apply the identical relative test, so the two stop on the same
128    /// contract instead of the gate spuriously aborting a constrained optimum
129    /// the solver legitimately reached (issue #989). Defaults to `0.0` when
130    /// deserialized from a model saved before this field existed, which makes
131    /// `max(gradient_scale, 1) = 1` and recovers the bare absolute test.
132    #[serde(default)]
133    pub gradient_scale: f64,
134}
135
136/// Inf-norm `‖g‖∞` used as the scale of the stationarity residual in the
137/// relative KKT criterion shared by the inner active-set solver and the outer
138/// validation gate (see [`ConstraintKktDiagnostics::gradient_scale`]).
139fn gradient_inf_norm(gradient: &Array1<f64>) -> f64 {
140    gradient.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs()))
141}
142
143fn solve_newton_direction_dense(
144    hessian: &Array2<f64>,
145    gradient: &Array1<f64>,
146    direction_out: &mut Array1<f64>,
147) -> Result<(), EstimationError> {
148    if direction_out.len() != gradient.len() {
149        *direction_out = Array1::zeros(gradient.len());
150    }
151
152    let factor = StableSolver::new("active-set newton direction")
153        .factorize(hessian)
154        .map_err(EstimationError::LinearSystemSolveFailed)?;
155    direction_out.assign(gradient);
156    let mut rhsview = array1_to_col_matmut(direction_out);
157    factor.solve_in_place(rhsview.as_mut());
158    direction_out.mapv_inplace(|v| -v);
159    if array_is_finite(direction_out) {
160        return Ok(());
161    }
162    Err(EstimationError::LinearSystemSolveFailed(
163        FaerLinalgError::FactorizationFailed {
164            context: "active-set newton direction non-finite solve",
165        },
166    ))
167}
168
169fn solve_dense_system_via_pseudoinverse(
170    matrix: &Array2<f64>,
171    rhs: &Array1<f64>,
172    out: &mut Array1<f64>,
173) -> Result<(), EstimationError> {
174    if matrix.nrows() != matrix.ncols() || rhs.len() != matrix.nrows() {
175        crate::bail_invalid_estim!("dense pseudoinverse solve dimension mismatch");
176    }
177
178    let (u_opt, singular, vt_opt) = matrix.svd(true, true).map_err(|_| {
179        EstimationError::InvalidInput("dense pseudoinverse solve SVD failed".to_string())
180    })?;
181    let (Some(u), Some(vt)) = (u_opt, vt_opt) else {
182        crate::bail_invalid_estim!("dense pseudoinverse solve missing singular vectors");
183    };
184
185    let max_singular = singular.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs()));
186    let tol = 100.0
187        * f64::EPSILON
188        * (matrix.nrows().max(matrix.ncols()).max(1) as f64)
189        * max_singular.max(1.0);
190    let mut coeff = u.t().dot(rhs);
191    for (idx, value) in coeff.iter_mut().enumerate() {
192        let sigma = singular[idx];
193        if sigma.abs() > tol {
194            *value /= sigma;
195        } else {
196            *value = 0.0;
197        }
198    }
199    let solution = vt.t().dot(&coeff);
200    if !array_is_finite(&solution) {
201        crate::bail_invalid_estim!("dense pseudoinverse solve produced non-finite values");
202    }
203    if out.len() != solution.len() {
204        *out = Array1::zeros(solution.len());
205    }
206    out.assign(&solution);
207    Ok(())
208}
209
210pub(crate) fn compute_constraint_kkt_diagnostics(
211    beta: &Array1<f64>,
212    gradient: &Array1<f64>,
213    constraints: &LinearInequalityConstraints,
214) -> ConstraintKktDiagnostics {
215    let m = constraints.a.nrows();
216    let active_tolerance = ACTIVE_SET_PRIMAL_FEASIBILITY_TOL;
217
218    // Measure feasibility in the *scaled* (geometric) coordinate system the
219    // solver's tolerance is expressed in: normalize each inequality
220    // `a_i·β ≥ b_i` by ‖a_i‖ so its slack becomes the signed Euclidean
221    // distance from β to the constraint hyperplane. Without this, a row with a
222    // large norm — e.g. a B-spline endpoint *derivative* clamp, whose rows
223    // carry ‖a_i‖ ≫ 1 — reports a raw slack inflated by ‖a_i‖, so an iterate
224    // that is feasible to the solver's scaled `ACTIVE_SET_PRIMAL_FEASIBILITY_TOL`
225    // guarantee can still exceed a raw primal gate downstream and be spuriously
226    // refused. Per-row normalization makes the diagnostic scale-invariant and
227    // consistent with that contract. Dual/complementarity/stationarity are
228    // invariant under this positive per-row rescaling (with λ̂_i = ‖a_i‖·λ_i:
229    // Âᵀλ̂ = Aᵀλ and λ̂_i·ŝ_i = λ_i·s_i), so only primal feasibility and the
230    // active-set threshold change meaning — both toward the geometric distance
231    // the tolerance is meant to bound.
232    let p = constraints.a.ncols();
233    let mut a_scaled = constraints.a.clone();
234    let mut b_scaled = constraints.b.clone();
235    for i in 0..m {
236        let n_i = constraints.a.row(i).dot(&constraints.a.row(i)).sqrt();
237        if n_i > 0.0 {
238            let inv = 1.0 / n_i;
239            a_scaled.row_mut(i).mapv_inplace(|v| v * inv);
240            b_scaled[i] *= inv;
241        }
242    }
243
244    let mut slack = Array1::<f64>::zeros(m);
245    let mut primal_feasibility: f64 = 0.0;
246    for i in 0..m {
247        let s_i = a_scaled.row(i).dot(beta) - b_scaled[i];
248        slack[i] = s_i;
249        primal_feasibility = primal_feasibility.max((-s_i).max(0.0));
250    }
251
252    let active_idx: Vec<usize> = (0..m).filter(|&i| slack[i] <= active_tolerance).collect();
253    let mut lambda = Array1::<f64>::zeros(m);
254    let mut working_set_rank_deficient = false;
255    if !active_idx.is_empty() {
256        let n_active = active_idx.len();
257        let mut a_active = Array2::<f64>::zeros((n_active, p));
258        for (r, &idx) in active_idx.iter().enumerate() {
259            a_active.row_mut(r).assign(&a_scaled.row(idx));
260        }
261        if let Some((_, lambda_active)) =
262            project_stationarity_residual_on_constraint_cone(gradient, &a_active)
263        {
264            for (r, &idx) in active_idx.iter().enumerate() {
265                lambda[idx] = lambda_active[r];
266            }
267        }
268        // Rank-deficiency detection on the (scaled) active rows. Per-row
269        // positive scaling is rank-preserving, so this answers the same
270        // question the inner solver's `CompressedActiveWorkingSet::
271        // is_degenerate_face` does — `rank(A_active) < n_active`. For curvature
272        // constraints the second-difference operator forces dependence
273        // whenever more than `p` rows bind, and for monotonicity the
274        // first-difference operator does so beyond a similar count. The
275        // diagnostic exposes the flag so the outer validation gate can apply
276        // the same `ACTIVE_SET_KKT_DEGENERATE_STATIONARITY_TOL` relaxation
277        // the inner solver does, instead of refusing the iterate at strict
278        // `ACTIVE_SET_KKT_STATIONARITY_TOL`.
279        working_set_rank_deficient = if n_active > p {
280            true
281        } else if n_active > 1 {
282            let groups: Vec<Vec<usize>> = (0..n_active).map(|i| vec![i]).collect();
283            let b_dummy = Array1::<f64>::zeros(n_active);
284            let (reduced_a, _, _, _) =
285                rank_reduce_rows_pivoted_qr_with_dependence(a_active, b_dummy, groups);
286            reduced_a.nrows() < n_active
287        } else {
288            false
289        };
290    }
291
292    let mut dual_feasibility: f64 = 0.0;
293    let mut complementarity: f64 = 0.0;
294    for i in 0..m {
295        dual_feasibility = dual_feasibility.max((-lambda[i]).max(0.0));
296        complementarity = complementarity.max((lambda[i] * slack[i]).abs());
297    }
298    let stationarity = {
299        let mut resid = gradient.to_owned();
300        resid -= &a_scaled.t().dot(&lambda);
301        resid.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs()))
302    };
303
304    ConstraintKktDiagnostics {
305        n_constraints: m,
306        n_active: active_idx.len(),
307        primal_feasibility,
308        dual_feasibility,
309        complementarity,
310        stationarity,
311        active_tolerance,
312        working_set_rank_deficient,
313        gradient_scale: gradient_inf_norm(gradient),
314    }
315}
316
317pub fn project_stationarity_residual_on_constraint_cone(
318    residual: &Array1<f64>,
319    active_a: &Array2<f64>,
320) -> Option<(Array1<f64>, Array1<f64>)> {
321    let p = residual.len();
322    if active_a.ncols() != p {
323        return None;
324    }
325    if active_a.nrows() == 0 {
326        return Some((residual.clone(), Array1::zeros(0)));
327    }
328
329    let m = active_a.nrows();
330    let residual_scale = residual
331        .iter()
332        .fold(0.0_f64, |acc, &v| acc.max(v.abs()))
333        .max(1.0);
334    let row_scale = active_a
335        .iter()
336        .fold(0.0_f64, |acc, &v| acc.max(v.abs()))
337        .max(1.0);
338    let tol = 100.0 * f64::EPSILON * (p.max(m).max(1) as f64) * residual_scale * row_scale;
339
340    let mut lambda = Array1::<f64>::zeros(m);
341    let mut passive = vec![false; m];
342    let mut projected = residual.clone();
343    let max_iter = (3 * m * m).max(10);
344
345    for _ in 0..max_iter {
346        let dual = active_a.dot(&projected);
347        let entering = (0..m)
348            .filter(|&idx| !passive[idx] && dual[idx] > tol)
349            .max_by(|&left, &right| {
350                dual[left]
351                    .partial_cmp(&dual[right])
352                    .unwrap_or(std::cmp::Ordering::Equal)
353            });
354        let Some(entering) = entering else {
355            lambda.mapv_inplace(|v| if v > tol { v } else { 0.0 });
356            projected = residual - &active_a.t().dot(&lambda);
357            if !array_is_finite(&projected) || !array_is_finite(&lambda) {
358                return None;
359            }
360            return Some((projected, lambda));
361        };
362        passive[entering] = true;
363
364        loop {
365            let passive_rows: Vec<usize> = (0..m).filter(|&idx| passive[idx]).collect();
366            if passive_rows.is_empty() {
367                lambda.fill(0.0);
368                projected.assign(residual);
369                break;
370            }
371
372            let mut a_passive = Array2::<f64>::zeros((passive_rows.len(), p));
373            for (pos, &row) in passive_rows.iter().enumerate() {
374                a_passive.row_mut(pos).assign(&active_a.row(row));
375            }
376            let gram = a_passive.dot(&a_passive.t());
377            let rhs = a_passive.dot(residual);
378            let mut lambda_passive = Array1::<f64>::zeros(passive_rows.len());
379            solve_dense_system_via_pseudoinverse(&gram, &rhs, &mut lambda_passive).ok()?;
380            if !array_is_finite(&lambda_passive) {
381                return None;
382            }
383
384            let all_positive = lambda_passive.iter().all(|&v| v > tol);
385            if all_positive {
386                lambda.fill(0.0);
387                for (pos, &row) in passive_rows.iter().enumerate() {
388                    lambda[row] = lambda_passive[pos];
389                }
390                projected = residual - &active_a.t().dot(&lambda);
391                break;
392            }
393
394            let mut alpha = f64::INFINITY;
395            for (pos, &row) in passive_rows.iter().enumerate() {
396                let candidate = lambda_passive[pos];
397                if candidate <= tol {
398                    let current = lambda[row];
399                    let denom = current - candidate;
400                    if denom > 0.0 {
401                        alpha = alpha.min(current / denom);
402                    }
403                }
404            }
405            if !alpha.is_finite() {
406                alpha = 0.0;
407            }
408            alpha = alpha.clamp(0.0, 1.0);
409
410            for (pos, &row) in passive_rows.iter().enumerate() {
411                lambda[row] += alpha * (lambda_passive[pos] - lambda[row]);
412                if lambda[row] <= tol {
413                    lambda[row] = 0.0;
414                    passive[row] = false;
415                }
416            }
417            if passive.iter().all(|&is_passive| !is_passive) {
418                projected.assign(residual);
419                break;
420            }
421        }
422    }
423
424    // Exhausting the Lawson-Hanson iterations means we do not have the
425    // normal-cone projection required for a KKT certificate.
426    None
427}
428
429pub(crate) fn feasible_point_for_linear_constraints(
430    constraints: &LinearInequalityConstraints,
431    p: usize,
432) -> Option<Array1<f64>> {
433    if constraints.a.ncols() != p
434        || constraints.a.nrows() == 0
435        || constraints.b.len() != constraints.a.nrows()
436    {
437        return None;
438    }
439    if constraints.b.iter().all(|v| v.abs() <= 1e-14) {
440        return Some(Array1::zeros(p));
441    }
442
443    let gram = constraints.a.dot(&constraints.a.t());
444    let (u_opt, singular, vt_opt) = gram.svd(true, true).ok()?;
445    let (Some(u), Some(vt)) = (u_opt, vt_opt) else {
446        return None;
447    };
448    let max_singular = singular.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs()));
449    let tol = 100.0 * f64::EPSILON * constraints.a.nrows().max(1) as f64 * max_singular.max(1.0);
450    let mut coeff = u.t().dot(&constraints.b);
451    for (idx, value) in coeff.iter_mut().enumerate() {
452        let sigma = singular[idx];
453        if sigma.abs() > tol {
454            *value /= sigma;
455        } else {
456            *value = 0.0;
457        }
458    }
459    let dual = vt.t().dot(&coeff);
460    let beta = constraints.a.t().dot(&dual);
461    if beta.len() != p || beta.iter().any(|v| !v.is_finite()) {
462        return None;
463    }
464    let slack = constraints.a.dot(&beta) - &constraints.b;
465    if slack.iter().all(|v| *v >= -1e-8) {
466        Some(beta)
467    } else {
468        None
469    }
470}
471
472/// Strictly-interior margin (in per-row geometric / scaled-slack units) required
473/// of the projected cold-start seed produced by
474/// [`project_point_strictly_into_feasible_cone`]. Each constraint row is shifted
475/// to `a_iᵀβ ≥ b_i + ACTIVE_SET_INTERIOR_SEED_MARGIN·‖a_i‖` so that, scaled by
476/// `‖a_i‖`, every row of the returned seed has slack `≥` this margin. The value
477/// is far above the active-set activation threshold (`tol_active = 1e-10`) so the
478/// initial working set the QP step solver builds from the seed is **empty** — no
479/// row is mistaken for "on the boundary" — yet small enough that the seed stays a
480/// negligible distance from the data-driven projection it is derived from.
481const ACTIVE_SET_INTERIOR_SEED_MARGIN: f64 = 1e-6;
482
483/// The strictly-interior cold-start margin (scaled-slack units) that
484/// [`project_point_strictly_into_feasible_cone`] guarantees on its returned
485/// seed. Exposed so the P-IRLS seed builder can decide, on the same scale,
486/// whether the current seed is already strictly interior (and may be used as-is)
487/// or sits on / outside the cone boundary (and must be projected).
488#[inline]
489pub(crate) fn interior_seed_margin() -> f64 {
490    ACTIVE_SET_INTERIOR_SEED_MARGIN
491}
492
493/// Maximum nesting depth of the strictly-interior feasibility repair before the
494/// solver stops re-projecting and surfaces an honest constraint-violation error.
495///
496/// [`project_point_strictly_into_feasible_cone`] and
497/// [`solve_quadratic_with_linear_constraints`] are mutually recursive: the
498/// quadratic solve's final feasibility contract (#1108) projects an infeasible
499/// iterate back onto the cone, and that projection itself solves an inner
500/// identity-Hessian QP whose *own* feasibility contract can project again. On a
501/// well-conditioned cone the repair converges at depth 0–1 — each level shifts
502/// every one-sided row strictly further inward. But on near-anti-parallel rows
503/// (the clamped / anchored monotone time-warp constraints an interval-censored
504/// survival fit emits, which are only *near* — not exactly — anti-parallel and so
505/// slip past the zero-width equality lift below), the inward-shifted QP can keep
506/// returning an infeasible candidate, so the `solve ↔ project` recursion never
507/// bottoms out and exhausts the worker stack. A cone that cannot be certified
508/// feasible within this many successive inward shifts is degenerate; the
509/// projection then returns `None`, which the quadratic solve reports as
510/// [`EstimationError::ParameterConstraintViolation`] rather than recursing.
511const MAX_FEASIBILITY_REPAIR_DEPTH: u32 = 16;
512
513thread_local! {
514    /// Current nesting depth of the `solve ↔ project` feasibility-repair cycle on
515    /// this thread. Every recursion path (the quadratic solve's repair step and
516    /// the projected-gradient fallback alike) routes back through
517    /// [`project_point_strictly_into_feasible_cone`], so bounding its re-entrancy
518    /// bounds the whole cycle. Per-thread because each solve runs to completion on
519    /// a single call stack; independent solves on other worker threads carry their
520    /// own counter.
521    static FEASIBILITY_REPAIR_DEPTH: Cell<u32> = const { Cell::new(0) };
522}
523
524/// RAII depth counter for the feasibility-repair recursion. [`enter`] increments
525/// the per-thread depth and returns a guard whose `Drop` restores it on every
526/// exit path — including the projection's many `return None` branches — so the
527/// counter can never leak. It yields `None` once
528/// [`MAX_FEASIBILITY_REPAIR_DEPTH`] is reached, so the caller bails out of the
529/// recursion instead of descending another level.
530///
531/// [`enter`]: FeasibilityRepairGuard::enter
532struct FeasibilityRepairGuard;
533
534impl FeasibilityRepairGuard {
535    fn enter() -> Option<Self> {
536        FEASIBILITY_REPAIR_DEPTH.with(|depth| {
537            let current = depth.get();
538            if current >= MAX_FEASIBILITY_REPAIR_DEPTH {
539                None
540            } else {
541                depth.set(current + 1);
542                Some(Self)
543            }
544        })
545    }
546}
547
548impl Drop for FeasibilityRepairGuard {
549    fn drop(&mut self) {
550        FEASIBILITY_REPAIR_DEPTH.with(|depth| depth.set(depth.get().saturating_sub(1)));
551    }
552}
553
554/// Project `point` to a *strictly interior* feasible point of the polyhedron
555/// `{β : A·β ≥ b}`: the solution of `min_β ½‖β − point‖²` subject to the
556/// margin-shifted system `A·β ≥ b + δ·‖a_i‖`, with `δ =
557/// ACTIVE_SET_INTERIOR_SEED_MARGIN`.
558///
559/// This is the principled feasible cold-start seed for a shape-constrained
560/// (convex / concave / monotone) smooth. It is qualitatively different from
561/// [`feasible_point_for_linear_constraints`], which returns the *minimum-norm*
562/// feasible point — for a homogeneous cone (`b = 0`, as the second-difference
563/// convexity / concavity constraints are) that minimum-norm point is the cone
564/// **vertex** `β = 0` (a flat line) where every constraint row is tight. A
565/// shape-constrained P-IRLS launched from that vertex hands the inner active-set
566/// QP an all-rows-active working set (every row's slack is `0`), and the QP then
567/// stalls on a degenerate, non-stationary face of the cone. The fit's success
568/// then depends on whether a warm-start seed happens to drop it into the right
569/// basin, so the same fit silently diverges (or aborts) between a cold and a
570/// warm cache (#873).
571///
572/// Requiring a strictly-positive margin on every row makes the returned seed an
573/// interior point: the QP step solver starts from an **empty** active set and
574/// adds only the genuinely binding rows, converging to the certified constrained
575/// stationary point regardless of cache state. The projection is the
576/// identity-Hessian instance of [`solve_quadratic_with_linear_constraints`]
577/// (`H = I`, `rhs = point` ⇒ minimizing `½‖β − point‖²`), so the interior seed is
578/// also the *nearest* strictly-interior point to the supplied data-driven
579/// `point` — it inherits whatever curvature `point` already carries. Returns
580/// `None` if the constraints are malformed or the active-set QP cannot certify a
581/// feasible solution, so callers can fall back.
582pub fn project_point_strictly_into_feasible_cone(
583    point: &Array1<f64>,
584    constraints: &LinearInequalityConstraints,
585) -> Option<Array1<f64>> {
586    // Bound the mutually-recursive `solve ↔ project` feasibility repair. Every
587    // recursion path re-enters here, so a too-deep call returns `None` (a
588    // degenerate cone the strictly-interior QP cannot certify) instead of
589    // recursing until the worker stack overflows. The guard restores the
590    // per-thread depth on every early return via its `Drop`.
591    let repair_guard = FeasibilityRepairGuard::enter()?;
592    let p = point.len();
593    let m = constraints.a.nrows();
594    if constraints.a.ncols() != p || m == 0 || constraints.b.len() != m {
595        return None;
596    }
597    let norms: Vec<f64> = (0..m)
598        .map(|i| constraints.a.row(i).dot(&constraints.a.row(i)).sqrt())
599        .collect();
600
601    // Classify rows. An *anti-parallel pair* with ~zero scaled feasible-slab
602    // width is an EQUALITY `rᵀβ = t` encoded as `{rᵀβ ≥ t, −rᵀβ ≥ −t}` (the
603    // canonical encoding emitted by a clamped / anchored boundary condition).
604    // Representing an equality as two opposing inequalities makes the inequality
605    // active-set QP CYCLE: it adds one side, the equality-split multiplier turns
606    // the other negative, it removes it, and the working set repeats until cycle
607    // detection aborts the solve — so the projection would fail and the caller
608    // would fall back to the cone vertex, silently reintroducing the #873 seed
609    // for the *combined* case (`shape=concave`/`convex` with `bc=clamped`). So we
610    // lift such pairs out as genuine equalities, eliminate them through the null
611    // space, and run the strictly-interior QP only on the one-sided rows. A pure
612    // shape cone has no anti-parallel rows, so `equality_rows` is empty and this
613    // reduces to the original single-QP path verbatim.
614    const ANTIPARALLEL_COS_TOL: f64 = -1.0 + 1e-9;
615    const EQUALITY_WIDTH_TOL: f64 = 1e-9;
616    let mut is_equality_member = vec![false; m];
617    let mut equality_rows: Vec<usize> = Vec::new();
618    let mut margin = vec![ACTIVE_SET_INTERIOR_SEED_MARGIN; m];
619    for i in 0..m {
620        if norms[i] == 0.0 {
621            margin[i] = 0.0;
622            continue;
623        }
624        for j in (i + 1)..m {
625            if norms[j] == 0.0 {
626                continue;
627            }
628            let cos = constraints.a.row(i).dot(&constraints.a.row(j)) / (norms[i] * norms[j]);
629            if cos > ANTIPARALLEL_COS_TOL {
630                continue;
631            }
632            // Anti-parallel rows â and −â: row i is `âᵀβ ≥ b_i/‖a_i‖`, row j is
633            // `âᵀβ ≤ −b_j/‖a_j‖`. Scaled feasible-slab width:
634            let width = -constraints.b[j] / norms[j] - constraints.b[i] / norms[i];
635            if width.abs() <= EQUALITY_WIDTH_TOL {
636                // Zero width ⇒ equality. Record it once (row i's orientation) and
637                // exclude both rows from the one-sided interior shift.
638                if !is_equality_member[i] && !is_equality_member[j] {
639                    equality_rows.push(i);
640                }
641                is_equality_member[i] = true;
642                is_equality_member[j] = true;
643            } else {
644                // Genuine (wide) two-sided bound: cap each side's inward shift at
645                // `w/3` so the shifted slab `s_i + s_j ≤ w` stays non-empty.
646                let cap = (width / 3.0).max(0.0);
647                margin[i] = margin[i].min(cap);
648                margin[j] = margin[j].min(cap);
649            }
650        }
651    }
652
653    // One-sided rows (everything not lifted into an equality), shifted strictly
654    // inward by `margin·‖a‖`.
655    let ineq_rows: Vec<usize> = (0..m).filter(|&i| !is_equality_member[i]).collect();
656    let mut a_ineq = Array2::<f64>::zeros((ineq_rows.len(), p));
657    let mut b_ineq = Array1::<f64>::zeros(ineq_rows.len());
658    for (r, &i) in ineq_rows.iter().enumerate() {
659        a_ineq.row_mut(r).assign(&constraints.a.row(i));
660        b_ineq[r] = constraints.b[i] + margin[i] * norms[i];
661    }
662
663    let beta = if equality_rows.is_empty() {
664        // No equalities: the original single strictly-interior QP
665        // (`min ½‖β − point‖²` s.t. the margin-shifted one-sided rows).
666        let interior = LinearInequalityConstraints::new(a_ineq, b_ineq)
667            .expect("shifted interior constraint shape invariant");
668        let identity = Array2::<f64>::eye(p);
669        solve_quadratic_with_linear_constraints(&identity, point, point, &interior, None)
670            .ok()?
671            .0
672    } else {
673        // Eliminate `E β = e` through its null space. From the thin SVD
674        // `E = U Σ Vᵀ` (rank `r`): the row space is `span(v_0..v_{r-1})`, the
675        // minimum-norm particular solution is `β_p = Σ_{i<r} (uᵢᵀe / σᵢ) vᵢ`, and
676        // an orthonormal null basis `Z` (p × (p−r)) is the complement of the row
677        // space (built by Gram-Schmidt of the standard axes — `p` is a single
678        // smooth-term width, so this is cheap and exact). Writing `β = β_p + Z u`
679        // and using `ZᵀZ = I`, the projection becomes the reduced strictly-
680        // interior QP `min ½‖u − Zᵀ(point − β_p)‖²` s.t. `(A_ineq Z) u ≥ b_ineq −
681        // A_ineq β_p`, whose rows carry no anti-parallel pair, so it can't cycle.
682        let k = equality_rows.len();
683        let mut e_mat = Array2::<f64>::zeros((k, p));
684        let mut e_rhs = Array1::<f64>::zeros(k);
685        for (r, &i) in equality_rows.iter().enumerate() {
686            e_mat.row_mut(r).assign(&constraints.a.row(i));
687            e_rhs[r] = constraints.b[i];
688        }
689        let (u_opt, sing, vt_opt) = e_mat.svd(true, true).ok()?;
690        let (u_mat, vt) = (u_opt?, vt_opt?);
691        let smax = sing.iter().fold(0.0_f64, |acc, &v| acc.max(v));
692        let rank_tol = smax.max(1.0) * (k.max(p) as f64) * f64::EPSILON * 100.0;
693        let rank = sing.iter().filter(|&&s| s > rank_tol).count();
694        if rank == 0 || rank >= p {
695            return None;
696        }
697        let mut beta_p = Array1::<f64>::zeros(p);
698        for idx in 0..rank {
699            let coeff = u_mat.column(idx).dot(&e_rhs) / sing[idx];
700            beta_p.scaled_add(coeff, &vt.row(idx));
701        }
702        // Orthonormal null basis: Gram-Schmidt the standard axes against the row
703        // space `vt[0..rank]` and the null vectors collected so far.
704        let mut basis: Vec<Array1<f64>> = (0..rank).map(|i| vt.row(i).to_owned()).collect();
705        let mut z = Array2::<f64>::zeros((p, p - rank));
706        let mut collected = 0usize;
707        for axis in 0..p {
708            if collected == p - rank {
709                break;
710            }
711            let mut v = Array1::<f64>::zeros(p);
712            v[axis] = 1.0;
713            for q in basis.iter() {
714                let c = q.dot(&v);
715                v.scaled_add(-c, q);
716            }
717            let nrm = v.dot(&v).sqrt();
718            if nrm > 1e-8 {
719                v /= nrm;
720                z.column_mut(collected).assign(&v);
721                basis.push(v);
722                collected += 1;
723            }
724        }
725        if collected != p - rank {
726            return None;
727        }
728        let a_red = a_ineq.dot(&z);
729        let b_red = &b_ineq - &a_ineq.dot(&beta_p);
730        let u0 = z.t().dot(&(point - &beta_p));
731        let reduced = LinearInequalityConstraints::new(a_red, b_red)
732            .expect("reduced constraint shape invariant");
733        let identity = Array2::<f64>::eye(z.ncols());
734        let (u_sol, _active) =
735            solve_quadratic_with_linear_constraints(&identity, &u0, &u0, &reduced, None).ok()?;
736        &beta_p + &z.dot(&u_sol)
737    };
738
739    if beta.len() != p || beta.iter().any(|v| !v.is_finite()) {
740        return None;
741    }
742    // Certify against the ORIGINAL constraints: every genuine one-sided row must
743    // clear (most of) its requested margin so the QP step solver sees no spurious
744    // active rows; equality-pair rows need only be feasible — they are
745    // legitimately tight.
746    const SEED_FEASIBILITY_TOL: f64 = 1e-9;
747    for i in 0..m {
748        let s = scaled_constraint_slack(&beta, constraints, i);
749        let lower = if is_equality_member[i] {
750            -SEED_FEASIBILITY_TOL
751        } else {
752            0.5 * margin[i] - SEED_FEASIBILITY_TOL
753        };
754        if s < lower {
755            return None;
756        }
757    }
758    // All mutually-recursive `solve ↔ project` calls are complete; release the
759    // per-thread recursion-depth guard explicitly on the success path (early
760    // returns above release it via `Drop`). Named + dropped (not `let _guard`)
761    // to satisfy the underscore-binding ban without changing its lifetime.
762    drop(repair_guard);
763    Some(beta)
764}
765
766/// Worst primal-feasibility violation across all rows of `constraints`,
767/// measured in the per-row-scaled (geometric) coordinate system documented on
768/// [`ACTIVE_SET_PRIMAL_FEASIBILITY_TOL`]. A row's slack is divided by ‖a_i‖
769/// so the returned value is the signed Euclidean distance from `beta` to the
770/// constraint hyperplane, not the raw dot-product residual. This matches
771/// [`compute_constraint_kkt_diagnostics`] and keeps the in-solver acceptance
772/// gate, the downstream KKT report, and any post-fit feasibility check on a
773/// single scale-invariant metric. A row with ‖a_i‖ = 38 (a typical B-spline
774/// endpoint-derivative clamp at k = 12) has a 1e-6 raw violation that is only
775/// 2.6e-8 in geometric units — accepting on raw alone is anisotropic in
776/// input-space and breaks the published contract.
777fn max_linear_constraint_violation(
778    beta: &Array1<f64>,
779    constraints: &LinearInequalityConstraints,
780) -> (f64, usize) {
781    let mut worst = 0.0_f64;
782    let mut worst_row = 0usize;
783    for i in 0..constraints.a.nrows() {
784        let norm = constraints.a.row(i).dot(&constraints.a.row(i)).sqrt();
785        let inv = if norm > 0.0 { 1.0 / norm } else { 0.0 };
786        let slack = (constraints.a.row(i).dot(beta) - constraints.b[i]) * inv;
787        let viol = (-slack).max(0.0);
788        if viol > worst {
789            worst = viol;
790            worst_row = i;
791        }
792    }
793    (worst, worst_row)
794}
795
796/// Per-row signed scaled slack: `(a_i·beta - b_i) / ‖a_i‖`. Returns zero for
797/// degenerate rows with `‖a_i‖ = 0` (those rows carry no geometric content).
798/// Used wherever the active-set solver needs to compare per-row feasibility
799/// against a scale-invariant tolerance.
800#[inline]
801fn scaled_constraint_slack(
802    beta: &Array1<f64>,
803    constraints: &LinearInequalityConstraints,
804    i: usize,
805) -> f64 {
806    let norm = constraints.a.row(i).dot(&constraints.a.row(i)).sqrt();
807    let inv = if norm > 0.0 { 1.0 / norm } else { 0.0 };
808    (constraints.a.row(i).dot(beta) - constraints.b[i]) * inv
809}
810
811pub(crate) fn solve_kkt_direction(
812    hessian: &Array2<f64>,
813    gradient: &Array1<f64>,
814    active_a: &Array2<f64>,
815    active_residual: Option<&Array1<f64>>,
816) -> Result<(Array1<f64>, Array1<f64>), EstimationError> {
817    let p = hessian.nrows();
818    let m = active_a.nrows();
819    if hessian.ncols() != p || gradient.len() != p || active_a.ncols() != p {
820        crate::bail_invalid_estim!("KKT solve dimension mismatch");
821    }
822    if let Some(residual) = active_residual
823        && residual.len() != m
824    {
825        crate::bail_invalid_estim!(
826            "KKT active residual length mismatch: got {}, expected {}",
827            residual.len(),
828            m
829        );
830    }
831    if m == 0 {
832        let mut d = Array1::<f64>::zeros(p);
833        solve_newton_direction_dense(hessian, gradient, &mut d)?;
834        return Ok((d, Array1::zeros(0)));
835    }
836    let mut kkt = Array2::<f64>::zeros((p + m, p + m));
837    kkt.slice_mut(s![0..p, 0..p]).assign(hessian);
838    kkt.slice_mut(s![0..p, p..(p + m)]).assign(&active_a.t());
839    kkt.slice_mut(s![p..(p + m), 0..p]).assign(active_a);
840
841    let mut rhs = Array1::<f64>::zeros(p + m);
842    for i in 0..p {
843        rhs[i] = -gradient[i];
844    }
845    if let Some(residual) = active_residual {
846        for i in 0..m {
847            rhs[p + i] = residual[i];
848        }
849    }
850    let rhs_target = rhs.clone();
851
852    let kkt_view = FaerArrayView::new(&kkt);
853    let factor = FaerLblt::new(kkt_view.as_ref(), Side::Lower);
854    let mut rhs_col = array1_to_col_matmut(&mut rhs);
855    factor.solve_in_place(rhs_col.as_mut());
856    if !rhs.iter().all(|v| v.is_finite()) {
857        solve_dense_system_via_pseudoinverse(&kkt, &rhs_target, &mut rhs)?;
858    }
859    let d = rhs.slice(s![0..p]).to_owned();
860    let lambda = rhs.slice(s![p..(p + m)]).to_owned();
861    Ok((d, lambda))
862}
863
864#[derive(Clone, Debug)]
865pub(crate) struct CompressedActiveWorkingSet {
866    pub(crate) constraints: LinearInequalityConstraints,
867    pub(crate) groups: Vec<Vec<usize>>,
868    multiplier_dependence: Vec<Vec<ActiveRowDependence>>,
869    pub(crate) original_active_count: usize,
870}
871
872// Visible at `pub` (with private fields) so it can appear in the signature of
873// the `pub` cross-crate `rank_reduce_rows_pivoted_qr_with_dependence`, which the
874// gam-custom-family rank-reduction tests call directly after the #1521 crate
875// carve. The fields stay private: external callers ignore the dependence return.
876#[derive(Clone, Copy, Debug)]
877pub struct ActiveRowDependence {
878    active_pos: usize,
879    coeff: f64,
880}
881
882impl CompressedActiveWorkingSet {
883    fn is_degenerate_face(&self) -> bool {
884        self.constraints.a.nrows() < self.original_active_count
885            || self.groups.iter().any(|group| group.len() > 1)
886    }
887
888    fn reconstructed_active_multipliers(&self, lambda_system: &Array1<f64>) -> Vec<(usize, f64)> {
889        let mut multipliers = Vec::new();
890        for (group_pos, &lambda_system_value) in lambda_system.iter().enumerate() {
891            let lambda_true = -lambda_system_value;
892            if let Some(dependencies) = self.multiplier_dependence.get(group_pos) {
893                for dependency in dependencies {
894                    if dependency.coeff.abs() > f64::EPSILON {
895                        multipliers.push((dependency.active_pos, lambda_true / dependency.coeff));
896                    }
897                }
898            }
899        }
900        multipliers
901    }
902}
903
904pub(crate) fn compress_active_working_set(
905    x: &Array1<f64>,
906    constraints: &LinearInequalityConstraints,
907    active: &[usize],
908) -> Result<CompressedActiveWorkingSet, EstimationError> {
909    let p = constraints.a.ncols();
910    if x.len() != p {
911        crate::bail_invalid_estim!("active working-set compression dimension mismatch");
912    }
913
914    let mut a_out = Array2::<f64>::zeros((active.len(), p));
915    let mut b_out = Array1::<f64>::zeros(active.len());
916    let mut groups_out: Vec<Vec<usize>> = Vec::with_capacity(active.len());
917    for (pos, &idx) in active.iter().enumerate() {
918        if idx >= constraints.a.nrows() {
919            crate::bail_invalid_estim!(
920                "active working-set index {} out of bounds for {} constraints",
921                idx,
922                constraints.a.nrows()
923            );
924        }
925        a_out.row_mut(pos).assign(&constraints.a.row(idx));
926        b_out[pos] = constraints.b[idx];
927        groups_out.push(vec![pos]);
928    }
929
930    let (a_out, b_out, groups_out, multiplier_dependence) =
931        rank_reduce_rows_pivoted_qr_with_dependence(a_out, b_out, groups_out);
932
933    Ok(CompressedActiveWorkingSet {
934        constraints: LinearInequalityConstraints::new(a_out, b_out)
935            .expect("compressed active constraint shape invariant"),
936        groups: groups_out,
937        multiplier_dependence,
938        original_active_count: active.len(),
939    })
940}
941
942fn identity_multiplier_dependence(groups: &[Vec<usize>]) -> Vec<Vec<ActiveRowDependence>> {
943    groups
944        .iter()
945        .map(|group| {
946            group
947                .iter()
948                .copied()
949                .map(|active_pos| ActiveRowDependence {
950                    active_pos,
951                    coeff: 1.0,
952                })
953                .collect()
954        })
955        .collect()
956}
957
958pub fn rank_reduce_rows_pivoted_qr_with_dependence(
959    a: Array2<f64>,
960    b: Array1<f64>,
961    groups: Vec<Vec<usize>>,
962) -> (
963    Array2<f64>,
964    Array1<f64>,
965    Vec<Vec<usize>>,
966    Vec<Vec<ActiveRowDependence>>,
967) {
968    let k = a.nrows();
969    let p = a.ncols();
970    if k <= 1 {
971        let multiplier_dependence = identity_multiplier_dependence(&groups);
972        return (a, b, groups, multiplier_dependence);
973    }
974
975    let mut at_faer = faer::Mat::<f64>::zeros(p, k);
976    for i in 0..k {
977        for j in 0..p {
978            at_faer[(j, i)] = a[[i, j]];
979        }
980    }
981
982    let qr = at_faer.as_ref().col_piv_qr();
983    let r_mat = qr.thin_R();
984    let diag_len = r_mat.nrows().min(r_mat.ncols());
985    let leading_diag = if diag_len > 0 {
986        r_mat[(0, 0)].abs()
987    } else {
988        0.0
989    };
990
991    const RANK_ALPHA: f64 = 100.0;
992    let tol = RANK_ALPHA * f64::EPSILON * (k.max(p).max(1) as f64) * leading_diag.max(1.0);
993
994    let rank = (0..diag_len).filter(|&i| r_mat[(i, i)].abs() > tol).count();
995    if rank >= k {
996        let multiplier_dependence = identity_multiplier_dependence(&groups);
997        return (a, b, groups, multiplier_dependence);
998    }
999    if rank == 0 {
1000        log::debug!(
1001            "rank-reduced active constraints from {} to 0 rows (all active rows numerically zero)",
1002            k
1003        );
1004        return (
1005            Array2::<f64>::zeros((0, p)),
1006            Array1::<f64>::zeros(0),
1007            Vec::new(),
1008            Vec::new(),
1009        );
1010    }
1011
1012    let (perm_fwd, _) = qr.P().arrays();
1013    let kept_orig: Vec<usize> = (0..rank).map(|j| perm_fwd[j].unbound()).collect();
1014    let dropped_orig: Vec<usize> = (rank..k).map(|j| perm_fwd[j].unbound()).collect();
1015
1016    let mut orig_to_out = std::collections::HashMap::with_capacity(rank);
1017    let mut a_out = Array2::<f64>::zeros((rank, p));
1018    let mut b_out = Array1::<f64>::zeros(rank);
1019    let mut groups_out: Vec<Vec<usize>> = Vec::with_capacity(rank);
1020    let mut multiplier_dependence: Vec<Vec<ActiveRowDependence>> = Vec::with_capacity(rank);
1021    for (out_idx, &orig_idx) in kept_orig.iter().enumerate() {
1022        a_out.row_mut(out_idx).assign(&a.row(orig_idx));
1023        b_out[out_idx] = b[orig_idx];
1024        groups_out.push(groups[orig_idx].clone());
1025        multiplier_dependence.push(
1026            groups[orig_idx]
1027                .iter()
1028                .copied()
1029                .map(|active_pos| ActiveRowDependence {
1030                    active_pos,
1031                    coeff: 1.0,
1032                })
1033                .collect(),
1034        );
1035        orig_to_out.insert(orig_idx, out_idx);
1036    }
1037
1038    for &dropped_idx in &dropped_orig {
1039        let dropped_row = a.row(dropped_idx);
1040        let dropped_norm = dropped_row.dot(&dropped_row).sqrt();
1041        let mut best_positive_align = -1.0_f64;
1042        let mut best_positive_target: Option<(usize, f64)> = None;
1043        let mut best_abs_align = -1.0_f64;
1044        let mut best_signed_target = (kept_orig[0], 1.0_f64);
1045        for &kept_idx in &kept_orig {
1046            let kept_row = a.row(kept_idx);
1047            let kept_norm = kept_row.dot(&kept_row).sqrt();
1048            let dot = kept_row.dot(&dropped_row);
1049            let align = if kept_norm > 0.0 && dropped_norm > 0.0 {
1050                dot / (kept_norm * dropped_norm)
1051            } else {
1052                dot
1053            };
1054            let coeff = if kept_norm > 0.0 {
1055                dot / (kept_norm * kept_norm)
1056            } else {
1057                0.0
1058            };
1059            if align.abs() > best_abs_align {
1060                best_abs_align = align.abs();
1061                best_signed_target = (kept_idx, coeff);
1062            }
1063            if coeff > 0.0 && align > best_positive_align {
1064                best_positive_align = align;
1065                best_positive_target = Some((kept_idx, coeff));
1066            }
1067        }
1068        let (best_target, coeff) = best_positive_target.unwrap_or(best_signed_target);
1069        let &out_idx = orig_to_out
1070            .get(&best_target)
1071            .expect("merge target must be a kept row");
1072        for &active_pos in &groups[dropped_idx] {
1073            multiplier_dependence[out_idx].push(ActiveRowDependence { active_pos, coeff });
1074        }
1075        if coeff > 0.0 {
1076            groups_out[out_idx].extend_from_slice(&groups[dropped_idx]);
1077        }
1078    }
1079
1080    for group in &mut groups_out {
1081        group.sort_unstable();
1082        group.dedup();
1083    }
1084    for dependencies in &mut multiplier_dependence {
1085        dependencies.sort_unstable_by_key(|dependency| dependency.active_pos);
1086        dependencies.dedup_by_key(|dependency| dependency.active_pos);
1087    }
1088
1089    let mut row_order: Vec<usize> = (0..groups_out.len()).collect();
1090    row_order.sort_by_key(|&idx| groups_out[idx].first().copied().unwrap_or(usize::MAX));
1091    if row_order.iter().enumerate().any(|(idx, &orig)| idx != orig) {
1092        let mut a_sorted = Array2::<f64>::zeros((rank, p));
1093        let mut b_sorted = Array1::<f64>::zeros(rank);
1094        let mut groups_sorted = Vec::with_capacity(rank);
1095        let mut dependence_sorted = Vec::with_capacity(rank);
1096        for (out_idx, orig_idx) in row_order.into_iter().enumerate() {
1097            a_sorted.row_mut(out_idx).assign(&a_out.row(orig_idx));
1098            b_sorted[out_idx] = b_out[orig_idx];
1099            groups_sorted.push(groups_out[orig_idx].clone());
1100            dependence_sorted.push(multiplier_dependence[orig_idx].clone());
1101        }
1102        a_out = a_sorted;
1103        b_out = b_sorted;
1104        groups_out = groups_sorted;
1105        multiplier_dependence = dependence_sorted;
1106    }
1107
1108    if rank < k {
1109        log::debug!(
1110            "rank-reduced active constraints from {} to {} rows (rank deficiency {})",
1111            k,
1112            rank,
1113            k - rank
1114        );
1115    }
1116
1117    (a_out, b_out, groups_out, multiplier_dependence)
1118}
1119
1120pub(crate) fn working_set_kkt_diagnostics_from_multipliers(
1121    x: &Array1<f64>,
1122    gradient: &Array1<f64>,
1123    working_constraints: &LinearInequalityConstraints,
1124    lambda_active_true: &Array1<f64>,
1125    n_total_constraints: usize,
1126) -> Result<ConstraintKktDiagnostics, EstimationError> {
1127    let p = working_constraints.a.ncols();
1128    if x.len() != p || gradient.len() != p {
1129        crate::bail_invalid_estim!("working-set KKT diagnostic dimension mismatch");
1130    }
1131    if lambda_active_true.len() != working_constraints.a.nrows() {
1132        crate::bail_invalid_estim!(
1133            "working-set KKT multiplier length mismatch: got {}, expected {}",
1134            lambda_active_true.len(),
1135            working_constraints.a.nrows()
1136        );
1137    }
1138    // Primal feasibility and complementarity are measured in the per-row-scaled
1139    // (geometric) coordinate system the public solver contract is expressed in
1140    // (see [`ACTIVE_SET_PRIMAL_FEASIBILITY_TOL`] and
1141    // [`compute_constraint_kkt_diagnostics`]). Without scaling, a row with
1142    // ‖a_i‖ ≫ 1 — e.g. a B-spline endpoint-derivative clamp — reports a raw
1143    // slack inflated by ‖a_i‖, and the in-solver acceptance gate
1144    // (`worst <= ACTIVE_SET_PRIMAL_FEASIBILITY_TOL`) becomes anisotropic across
1145    // rows. Complementarity is the SCALED product `λ̂_i · ŝ_i` with
1146    // `λ̂_i = ‖a_i‖·λ_i`; that's invariant under the same per-row rescaling, so
1147    // its semantics are unchanged while the units match the primal column.
1148    let m = working_constraints.a.nrows();
1149    let mut slack = Array1::<f64>::zeros(m);
1150    let mut primal_feasibility: f64 = 0.0;
1151    for i in 0..m {
1152        let s_i = scaled_constraint_slack(x, working_constraints, i);
1153        slack[i] = s_i;
1154        primal_feasibility = primal_feasibility.max((-s_i).max(0.0));
1155    }
1156
1157    let lambda = lambda_active_true.to_owned();
1158
1159    let mut dual_feasibility: f64 = 0.0;
1160    let mut complementarity: f64 = 0.0;
1161    for i in 0..m {
1162        dual_feasibility = dual_feasibility.max((-lambda[i]).max(0.0));
1163        // Scale-invariant complementarity `λ̂_i · ŝ_i` with `λ̂_i = ‖a_i‖·λ_i`
1164        // and `ŝ_i` the already-scaled slack: this product equals the raw
1165        // `λ_i · (a_iᵀx − b_i)`, invariant under per-row rescaling — matching the
1166        // documented contract above (`λ̂_i = ‖a_i‖·λ_i`). `lambda_active_true`
1167        // here is the RAW multiplier, so without the `‖a_i‖` factor this would
1168        // understate complementarity by `1/‖a_i‖` on high-norm rows (e.g. a
1169        // B-spline endpoint-derivative clamp, ‖a‖ ≈ 38).
1170        let norm_i = working_constraints
1171            .a
1172            .row(i)
1173            .dot(&working_constraints.a.row(i))
1174            .sqrt();
1175        complementarity = complementarity.max((norm_i * lambda[i] * slack[i]).abs());
1176    }
1177    let stationarity = {
1178        let mut resid = gradient.to_owned();
1179        resid -= &working_constraints.a.t().dot(&lambda);
1180        resid.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs()))
1181    };
1182
1183    Ok(ConstraintKktDiagnostics {
1184        n_constraints: n_total_constraints,
1185        n_active: m,
1186        primal_feasibility,
1187        dual_feasibility,
1188        complementarity,
1189        stationarity,
1190        active_tolerance: ACTIVE_SET_PRIMAL_FEASIBILITY_TOL,
1191        // `working_constraints` is the already-rank-reduced compressed
1192        // working set, so by construction `rank(working_constraints.a) ==
1193        // n_active`. Whether the *original* (uncompressed) active set was
1194        // rank-deficient is the caller's responsibility to track when it
1195        // needs to surface that to a downstream gate; here we report the
1196        // post-compression view honestly.
1197        working_set_rank_deficient: false,
1198        gradient_scale: gradient_inf_norm(gradient),
1199    })
1200}
1201
1202fn canonicalize_active_constraint_ids(
1203    x: &Array1<f64>,
1204    constraints: &LinearInequalityConstraints,
1205    active: &[usize],
1206) -> Result<Vec<usize>, EstimationError> {
1207    if active.is_empty() {
1208        return Ok(Vec::new());
1209    }
1210    let compressed_working = compress_active_working_set(x, constraints, active)?;
1211    let mut canonical = Vec::with_capacity(compressed_working.groups.len());
1212    for group in &compressed_working.groups {
1213        if let Some(&active_pos) = group.first() {
1214            canonical.push(active[active_pos]);
1215        }
1216    }
1217    Ok(canonical)
1218}
1219
1220fn fallback_projected_gradient_direction(
1221    x: &Array1<f64>,
1222    d_total: &Array1<f64>,
1223    gradient: &Array1<f64>,
1224    working_constraints: &LinearInequalityConstraints,
1225    constraints: &LinearInequalityConstraints,
1226) -> Result<Option<(Array1<f64>, Vec<usize>)>, EstimationError> {
1227    let p = gradient.len();
1228    if x.len() != p || d_total.len() != p || constraints.a.ncols() != p {
1229        crate::bail_invalid_estim!("projected-gradient fallback dimension mismatch");
1230    }
1231
1232    let tangent_direction = if working_constraints.a.nrows() == 0 {
1233        -gradient
1234    } else {
1235        let identity = Array2::<f64>::eye(p);
1236        let residual = &working_constraints.b - &working_constraints.a.dot(x);
1237        let (direction, _) =
1238            solve_kkt_direction(&identity, gradient, &working_constraints.a, Some(&residual))?;
1239        direction
1240    };
1241
1242    if !array_is_finite(&tangent_direction) {
1243        return Ok(None);
1244    }
1245
1246    let step_inf = tangent_direction
1247        .iter()
1248        .fold(0.0_f64, |acc, &value| acc.max(value.abs()));
1249    if step_inf <= 1e-12 {
1250        // The projected-gradient tangent step has collapsed to ~0: the working
1251        // set holds the gradient stationary, so no descent direction remains in
1252        // the working tangent space. Returning `d_total` as-is is ONLY correct
1253        // when the iterate `x = beta_start + d_total` is itself feasible. When
1254        // `x` is infeasible (an inactive row was violated by an earlier
1255        // `alpha`-clipped step and never repaired — the KKT solve only closes
1256        // residuals on ACTIVE rows), returning `d_total` leaks an infeasible
1257        // iterate that the downstream `check_linear_feasibility` gate rejects
1258        // as an "infeasible iterate" (#1108: the interval-censored survival
1259        // surrogate landed here at scaled-violation 5.7e-3..0.12 while the
1260        // exact projection of the SAME point reaches ~0 violation). Project the
1261        // stationary iterate onto the feasible cone and return the corresponding
1262        // direction so the solve returns a feasible point. The returned result
1263        // is `beta_start + dir`, and `x = beta_start + d_total`, so to return a
1264        // feasible `p` the direction is `dir = d_total + (p - x)`.
1265        let (worst, _) = max_linear_constraint_violation(x, constraints);
1266        if worst > ACTIVE_SET_PRIMAL_FEASIBILITY_TOL {
1267            let projected = project_point_strictly_into_feasible_cone(x, constraints)
1268                .or_else(|| {
1269                    let identity = Array2::<f64>::eye(p);
1270                    solve_quadratic_with_linear_constraints(&identity, x, x, constraints, None)
1271                        .ok()
1272                        .map(|(beta, _active)| beta)
1273                })
1274                .filter(|p_candidate| {
1275                    max_linear_constraint_violation(p_candidate, constraints).0
1276                        <= ACTIVE_SET_PRIMAL_FEASIBILITY_TOL
1277                });
1278            let Some(projected) = projected else {
1279                // No feasible repair available — let the caller report honestly
1280                // rather than returning an infeasible direction.
1281                return Ok(None);
1282            };
1283            let repair = &projected - x;
1284            let active = canonicalize_active_constraint_ids(&projected, constraints, &[])?;
1285            return Ok(Some((d_total + &repair, active)));
1286        }
1287        let active = canonicalize_active_constraint_ids(x, constraints, &[])?;
1288        return Ok(Some((d_total.clone(), active)));
1289    }
1290
1291    let directional_derivative = gradient.dot(&tangent_direction);
1292    if !directional_derivative.is_finite() || directional_derivative >= 0.0 {
1293        return Ok(None);
1294    }
1295
1296    let mut alpha = 1.0_f64;
1297    for i in 0..constraints.a.nrows() {
1298        // Scale the step-fraction test by 1/‖a_i‖ so it matches the geometric
1299        // activation tolerance used elsewhere (see in-loop alpha computation in
1300        // `solve_newton_direction_with_linear_constraints_impl`).
1301        let norm = constraints.a.row(i).dot(&constraints.a.row(i)).sqrt();
1302        let inv = if norm > 0.0 { 1.0 / norm } else { 0.0 };
1303        let slack = (constraints.a.row(i).dot(x) - constraints.b[i]) * inv;
1304        let ai_d = constraints.a.row(i).dot(&tangent_direction) * inv;
1305        if let Some(candidate) = boundary_hit_step_fraction(slack, ai_d, alpha) {
1306            alpha = candidate;
1307        }
1308    }
1309    if !alpha.is_finite() || alpha <= 0.0 {
1310        return Ok(None);
1311    }
1312
1313    let fallback_step = tangent_direction * alpha;
1314    let new_x = x + &fallback_step;
1315    // Per-row-scaled feasibility, matching ACTIVE_SET_PRIMAL_FEASIBILITY_TOL.
1316    let (worst, _) = max_linear_constraint_violation(&new_x, constraints);
1317    if worst > ACTIVE_SET_PRIMAL_FEASIBILITY_TOL {
1318        return Ok(None);
1319    }
1320    let active = (0..constraints.a.nrows())
1321        .filter(|&i| scaled_constraint_slack(&new_x, constraints, i) <= 1e-10)
1322        .collect::<Vec<_>>();
1323    let active = canonicalize_active_constraint_ids(&new_x, constraints, &active)?;
1324    Ok(Some((d_total + &fallback_step, active)))
1325}
1326
1327fn log_active_set_transition(
1328    event: &str,
1329    iteration: usize,
1330    active_len: usize,
1331    constraint: Option<usize>,
1332) {
1333    log::debug!(
1334        "[active-set/QP] iter={} event={} active={} constraint={}",
1335        iteration,
1336        event,
1337        active_len,
1338        constraint
1339            .map(|idx| idx.to_string())
1340            .unwrap_or_else(|| "NA".to_string()),
1341    );
1342}
1343
1344/// Record the current working set; returns `false` when this exact set was
1345/// already visited. Both the entering and leaving rules above are
1346/// lowest-index (Bland), so a repeat cannot be a true simplex cycle — it is
1347/// tolerance-band oscillation (a row added by the slack band and dropped by a
1348/// noise-level negative dual, repeatedly). The caller breaks to the post-loop
1349/// KKT exit gate, which inspects the iterate's full KKT residuals and either
1350/// accepts via the projected-gradient fallback or returns an explicit error —
1351/// never a silent acceptance, and no longer a hard seed-killing error on an
1352/// otherwise numerically-converged iterate (#1025).
1353fn record_active_working_set(
1354    visited: &mut HashSet<Vec<usize>>,
1355    active: &[usize],
1356    iteration: usize,
1357) -> bool {
1358    let mut key = active.to_vec();
1359    key.sort_unstable();
1360    if visited.insert(key.clone()) {
1361        return true;
1362    }
1363    log::debug!(
1364        "[active-set/QP] iter={iteration} repeated working set ({} rows); \
1365         deferring to the post-loop KKT exit gate",
1366        key.len()
1367    );
1368    false
1369}
1370
1371fn solve_newton_direction_with_linear_constraints_impl(
1372    hessian: &Array2<f64>,
1373    gradient: &Array1<f64>,
1374    beta: &Array1<f64>,
1375    constraints: &LinearInequalityConstraints,
1376    direction_out: &mut Array1<f64>,
1377    active_hint: Option<&mut Vec<usize>>,
1378    max_iterations: usize,
1379) -> Result<(), EstimationError> {
1380    let p = gradient.len();
1381    if direction_out.len() != p {
1382        *direction_out = Array1::zeros(p);
1383    }
1384    let m = constraints.a.nrows();
1385    if constraints.a.ncols() != p || constraints.b.len() != m || beta.len() != p {
1386        crate::bail_invalid_estim!(
1387            "linear constraint shape mismatch: A={}x{}, b={}, p={}",
1388            constraints.a.nrows(),
1389            constraints.a.ncols(),
1390            constraints.b.len(),
1391            p
1392        );
1393    }
1394
1395    let tol_active = 1e-10;
1396    let tol_step = 1e-12;
1397    let tol_dual = 1e-10;
1398    let mut x = beta.to_owned();
1399    let mut d_total = Array1::<f64>::zeros(p);
1400    let mut g_cur = gradient.to_owned();
1401
1402    let has_active_hint = active_hint
1403        .as_ref()
1404        .map(|hint| !hint.is_empty())
1405        .unwrap_or(false);
1406    if !has_active_hint && solve_newton_direction_dense(hessian, gradient, direction_out).is_ok() {
1407        let candidate = beta + &*direction_out;
1408        let mut feasible = true;
1409        for i in 0..m {
1410            // Scaled (geometric) slack — matches `tol_active`'s semantics and
1411            // the public solver contract on `ACTIVE_SET_PRIMAL_FEASIBILITY_TOL`.
1412            let slack = scaled_constraint_slack(&candidate, constraints, i);
1413            if slack < -tol_active {
1414                feasible = false;
1415                break;
1416            }
1417        }
1418        if feasible {
1419            return Ok(());
1420        }
1421    }
1422
1423    let mut active: Vec<usize> = Vec::new();
1424    let mut is_active = vec![false; m];
1425    if let Some(hint) = active_hint.as_ref() {
1426        for &idx in hint.iter() {
1427            if idx < m && !is_active[idx] {
1428                active.push(idx);
1429                is_active[idx] = true;
1430                log_active_set_transition("warm-add", 0, active.len(), Some(idx));
1431            }
1432        }
1433    }
1434    for i in 0..m {
1435        // Scaled (geometric) slack: a row with ‖a_i‖ ≫ 1 (e.g. a B-spline
1436        // endpoint-derivative clamp at k = 12, ‖a_i‖ ≈ 38) would otherwise
1437        // require a raw slack of `tol_active·‖a_i‖` ≈ 4e-9 to activate, which
1438        // is below LBLT-solve precision on the KKT system and starves the
1439        // active set of rows that genuinely belong on the boundary.
1440        let slack = scaled_constraint_slack(&x, constraints, i);
1441        if slack <= tol_active && !is_active[i] {
1442            active.push(i);
1443            is_active[i] = true;
1444            log_active_set_transition("initial-boundary-add", 0, active.len(), Some(i));
1445        }
1446    }
1447    let mut visited_working_sets: HashSet<Vec<usize>> = HashSet::new();
1448    record_active_working_set(&mut visited_working_sets, &active, 0);
1449
1450    for iteration in 0..max_iterations {
1451        let compressed_working = compress_active_working_set(&x, constraints, &active)?;
1452        let mut residualw = Array1::<f64>::zeros(compressed_working.constraints.a.nrows());
1453        for r in 0..compressed_working.constraints.a.nrows() {
1454            residualw[r] = compressed_working.constraints.b[r]
1455                - compressed_working.constraints.a.row(r).dot(&x);
1456        }
1457        let (d, lambdaw) = solve_kkt_direction(
1458            hessian,
1459            &g_cur,
1460            &compressed_working.constraints.a,
1461            Some(&residualw),
1462        )?;
1463        let step_norm = d.iter().map(|v| v * v).sum::<f64>().sqrt();
1464        if step_norm <= tol_step {
1465            // A "stationary" iterate is only a genuine KKT point if it is
1466            // also primal-feasible on the FULL constraint set. The
1467            // "blocking-add" loop further down only catches rows that
1468            // become tight DURING a non-degenerate step, so a tiny step
1469            // entered with a residual violation on an inactive row would
1470            // otherwise leak an infeasible direction through this
1471            // short-circuit unchecked. Grow the active set with the
1472            // most-violated inactive row and re-solve; `residualw` for that
1473            // row will be non-zero on the next KKT solve, so the resulting
1474            // step is non-degenerate.
1475            let (worst, worst_row) = max_linear_constraint_violation(&x, constraints);
1476            if worst > ACTIVE_SET_PRIMAL_FEASIBILITY_TOL && !is_active[worst_row] {
1477                active.push(worst_row);
1478                is_active[worst_row] = true;
1479                log_active_set_transition(
1480                    "stationary-infeasible-add",
1481                    iteration,
1482                    active.len(),
1483                    Some(worst_row),
1484                );
1485                if !record_active_working_set(&mut visited_working_sets, &active, iteration) {
1486                    break;
1487                }
1488                continue;
1489            }
1490            if worst > ACTIVE_SET_PRIMAL_FEASIBILITY_TOL {
1491                // The worst-violating row is already in the active set —
1492                // the KKT step failed to restore its feasibility, typically
1493                // because a previous iteration's `alpha`-clip prevented the
1494                // full residual closure. Fall through to the post-loop exit
1495                // gate, which inspects the iterate's full KKT residuals and
1496                // either tries the projected-gradient fallback or returns
1497                // an explicit error. Both are correct outcomes; silently
1498                // returning the infeasible direction is not.
1499                break;
1500            }
1501            if compressed_working.groups.is_empty() {
1502                direction_out.assign(&d_total);
1503                return Ok(());
1504            }
1505            let remove_pos = compressed_working
1506                .reconstructed_active_multipliers(&lambdaw)
1507                .into_iter()
1508                .filter(|&(_, lambda_true)| lambda_true < -tol_dual)
1509                .min_by_key(|(active_pos, _)| (active[*active_pos], *active_pos))
1510                .map(|(active_pos, _)| active_pos);
1511            if let Some(active_pos) = remove_pos {
1512                let idx = active.remove(active_pos);
1513                is_active[idx] = false;
1514                log_active_set_transition(
1515                    "remove-negative-dual",
1516                    iteration,
1517                    active.len(),
1518                    Some(idx),
1519                );
1520                if !record_active_working_set(&mut visited_working_sets, &active, iteration) {
1521                    break;
1522                }
1523                continue;
1524            }
1525            if let Some(hint) = active_hint {
1526                hint.clear();
1527                hint.extend(canonicalize_active_constraint_ids(
1528                    &x,
1529                    constraints,
1530                    &active,
1531                )?);
1532            }
1533            direction_out.assign(&d_total);
1534            return Ok(());
1535        }
1536
1537        let mut alpha = 1.0_f64;
1538        for i in 0..m {
1539            if is_active[i] {
1540                continue;
1541            }
1542            // boundary_hit_step_fraction is scale-equivariant in `(slack, ai_d)`:
1543            // scaling both by 1/‖a_i‖ leaves `step = slack / -ai_d` unchanged,
1544            // and its directional-tol/finite checks operate on a max-magnitude
1545            // scale of the same triple. Doing the geometric rescale here keeps
1546            // the step-fraction's "moving toward the boundary" test in the same
1547            // scaled coordinates the activation tests use.
1548            let norm = constraints.a.row(i).dot(&constraints.a.row(i)).sqrt();
1549            let inv = if norm > 0.0 { 1.0 / norm } else { 0.0 };
1550            let slack = (constraints.a.row(i).dot(&x) - constraints.b[i]) * inv;
1551            let ai_d = constraints.a.row(i).dot(&d) * inv;
1552            if let Some(cand) = boundary_hit_step_fraction(slack, ai_d, alpha) {
1553                alpha = cand;
1554            }
1555        }
1556
1557        ndarray::Zip::from(&mut x)
1558            .and(&mut d_total)
1559            .and(&d)
1560            .for_each(|x_i, dt_i, &d_i| {
1561                let alpha_d = alpha * d_i;
1562                *x_i += alpha_d;
1563                *dt_i += alpha_d;
1564            });
1565        g_cur = gradient + &hessian.dot(&d_total);
1566
1567        let mut added_new_active = false;
1568        let mut working_set_repeated = false;
1569        for i in 0..m {
1570            if is_active[i] {
1571                continue;
1572            }
1573            let slack = scaled_constraint_slack(&x, constraints, i);
1574            if slack <= tol_active {
1575                active.push(i);
1576                is_active[i] = true;
1577                added_new_active = true;
1578                log_active_set_transition("blocking-add", iteration, active.len(), Some(i));
1579                working_set_repeated =
1580                    !record_active_working_set(&mut visited_working_sets, &active, iteration);
1581                break;
1582            }
1583        }
1584        if working_set_repeated {
1585            break;
1586        }
1587
1588        if active.is_empty() && !added_new_active {
1589            if let Some(hint) = active_hint {
1590                hint.clear();
1591            }
1592            direction_out.assign(&d_total);
1593            return Ok(());
1594        }
1595    }
1596
1597    let compressed_working = compress_active_working_set(&x, constraints, &active)?;
1598    let mut residualw = Array1::<f64>::zeros(compressed_working.constraints.a.nrows());
1599    for r in 0..compressed_working.constraints.a.nrows() {
1600        residualw[r] =
1601            compressed_working.constraints.b[r] - compressed_working.constraints.a.row(r).dot(&x);
1602    }
1603    let (_, lambdaw) = solve_kkt_direction(
1604        hessian,
1605        &g_cur,
1606        &compressed_working.constraints.a,
1607        Some(&residualw),
1608    )?;
1609    let lambda_true = lambdaw.mapv(|lam_sys| -lam_sys);
1610    let (worst, row) = max_linear_constraint_violation(&x, constraints);
1611    let working_kkt = working_set_kkt_diagnostics_from_multipliers(
1612        &x,
1613        &g_cur,
1614        &compressed_working.constraints,
1615        &lambda_true,
1616        m,
1617    )?;
1618    let kkt = compute_constraint_kkt_diagnostics(&x, &g_cur, constraints);
1619    let grad_inf = gradient_inf_norm(&g_cur);
1620    let stationarity_rel = working_kkt.stationarity / grad_inf.max(1.0);
1621    let step_inf = d_total.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs()));
1622    let hd_total = hessian.dot(&d_total);
1623    let predicted_delta = gradient.dot(&d_total)
1624        + 0.5
1625            * d_total
1626                .iter()
1627                .zip(hd_total.iter())
1628                .map(|(a, b)| a * b)
1629                .sum::<f64>();
1630    let kkt_strong_ok = (working_kkt.stationarity <= ACTIVE_SET_KKT_STATIONARITY_TOL
1631        || stationarity_rel <= ACTIVE_SET_KKT_STATIONARITY_TOL)
1632        && working_kkt.complementarity <= ACTIVE_SET_KKT_COMPLEMENTARITY_TOL;
1633    let model_descent_ok =
1634        predicted_delta <= -ACTIVE_SET_MODEL_DESCENT_REL_TOL * (1.0 + grad_inf * step_inf);
1635    let degenerate_boundary_ok = compressed_working.is_degenerate_face()
1636        && worst <= ACTIVE_SET_PRIMAL_FEASIBILITY_TOL
1637        && working_kkt.primal_feasibility <= ACTIVE_SET_PRIMAL_FEASIBILITY_TOL
1638        && working_kkt.complementarity <= ACTIVE_SET_KKT_COMPLEMENTARITY_TOL
1639        && (working_kkt.stationarity <= ACTIVE_SET_KKT_DEGENERATE_STATIONARITY_TOL
1640            || stationarity_rel <= ACTIVE_SET_KKT_STATIONARITY_TOL);
1641    if worst <= ACTIVE_SET_PRIMAL_FEASIBILITY_TOL
1642        && ((working_kkt.dual_feasibility <= ACTIVE_SET_KKT_DUAL_FEASIBILITY_TOL
1643            && (kkt_strong_ok || model_descent_ok))
1644            || degenerate_boundary_ok)
1645    {
1646        if let Some(hint) = active_hint {
1647            hint.clear();
1648            hint.extend(canonicalize_active_constraint_ids(
1649                &x,
1650                constraints,
1651                &active,
1652            )?);
1653        }
1654        direction_out.assign(&d_total);
1655        return Ok(());
1656    }
1657    if let Some((fallback_direction, fallback_active)) = fallback_projected_gradient_direction(
1658        &x,
1659        &d_total,
1660        &g_cur,
1661        &compressed_working.constraints,
1662        constraints,
1663    )? {
1664        if let Some(hint) = active_hint {
1665            hint.clear();
1666            hint.extend(fallback_active);
1667        }
1668        direction_out.assign(&fallback_direction);
1669        return Ok(());
1670    }
1671    Err(EstimationError::ParameterConstraintViolation(format!(
1672        "linear-constrained Newton active-set failed to converge; max(Aβ-b violation)={worst:.3e} at row {row}; KKT[primal={:.3e}, dual={:.3e}, comp={:.3e}, stat={:.3e}, active={}/{}]; diagnostic-reconstruction[dual={:.3e}, stat={:.3e}]",
1673        working_kkt.primal_feasibility,
1674        working_kkt.dual_feasibility,
1675        working_kkt.complementarity,
1676        working_kkt.stationarity,
1677        working_kkt.n_active,
1678        working_kkt.n_constraints,
1679        kkt.dual_feasibility,
1680        kkt.stationarity
1681    )))
1682}
1683
1684pub(crate) fn solve_newton_direction_with_linear_constraints(
1685    hessian: &Array2<f64>,
1686    gradient: &Array1<f64>,
1687    beta: &Array1<f64>,
1688    constraints: &LinearInequalityConstraints,
1689    direction_out: &mut Array1<f64>,
1690    active_hint: Option<&mut Vec<usize>>,
1691) -> Result<(), EstimationError> {
1692    let max_iterations = (gradient.len() + constraints.a.nrows() + 8) * 4;
1693    solve_newton_direction_with_linear_constraints_impl(
1694        hessian,
1695        gradient,
1696        beta,
1697        constraints,
1698        direction_out,
1699        active_hint,
1700        max_iterations,
1701    )
1702}
1703
1704pub fn solve_quadratic_with_linear_constraints(
1705    hessian: &Array2<f64>,
1706    rhs: &Array1<f64>,
1707    beta_start: &Array1<f64>,
1708    constraints: &LinearInequalityConstraints,
1709    warm_active_set: Option<&[usize]>,
1710) -> Result<(Array1<f64>, Vec<usize>), EstimationError> {
1711    if hessian.ncols() != hessian.nrows()
1712        || rhs.len() != hessian.nrows()
1713        || beta_start.len() != hessian.nrows()
1714        || constraints.a.ncols() != hessian.nrows()
1715    {
1716        crate::bail_invalid_estim!("constrained quadratic solve: system dimension mismatch");
1717    }
1718    let gradient = hessian.dot(beta_start) - rhs;
1719    let mut delta = Array1::<f64>::zeros(beta_start.len());
1720    let mut active_hint = warm_active_set.map_or_else(Vec::new, |active| active.to_vec());
1721    solve_newton_direction_with_linear_constraints(
1722        hessian,
1723        &gradient,
1724        beta_start,
1725        constraints,
1726        &mut delta,
1727        Some(&mut active_hint),
1728    )?;
1729    let candidate = beta_start + &delta;
1730    // FINAL FEASIBILITY CONTRACT (#1108). The active-set inner step computes its
1731    // Newton direction only against the ACTIVE working rows and line-searches
1732    // `alpha` against the inactive rows; a row whose approach rate falls inside
1733    // `boundary_hit_step_fraction`'s directional tolerance is not clipped, so the
1734    // returned `candidate` can overshoot a currently-inactive row and land
1735    // outside the cone by a small-but-gate-failing amount (the interval-censored
1736    // survival surrogate leaked 5.5e-3..2.2e-2 raw at cycles 3/9 here, accepted
1737    // into `states`, then rejected by the next cycle's `check_linear_feasibility`
1738    // — `infeasible iterate`). The solver's PUBLIC CONTRACT is a feasible point;
1739    // enforce it at the single chokepoint every caller flows through. A genuinely
1740    // converged feasible solve has `worst ~ 0`, so this is a no-op there and does
1741    // not perturb a well-conditioned constrained fit. When the step did leak,
1742    // project the iterate onto the feasible cone (the exact projection the #1108
1743    // diag proves reaches ~0 violation: the nearest strictly-interior point) and
1744    // return THAT. If no feasible repair is achievable, surface the active-set
1745    // error rather than returning an infeasible point.
1746    let (worst, _) = max_linear_constraint_violation(&candidate, constraints);
1747    if worst <= ACTIVE_SET_PRIMAL_FEASIBILITY_TOL {
1748        return Ok((candidate, active_hint));
1749    }
1750    let repaired = project_point_strictly_into_feasible_cone(&candidate, constraints).filter(|p| {
1751        max_linear_constraint_violation(p, constraints).0 <= ACTIVE_SET_PRIMAL_FEASIBILITY_TOL
1752    });
1753    match repaired {
1754        Some(feasible) => {
1755            let active = canonicalize_active_constraint_ids(&feasible, constraints, &[])?;
1756            Ok((feasible, active))
1757        }
1758        None => Err(EstimationError::ParameterConstraintViolation(format!(
1759            "constrained quadratic solve returned an infeasible iterate \
1760             (max scaled violation {worst:.3e}) and no feasible projection could be \
1761             certified onto the constraint cone",
1762        ))),
1763    }
1764}
1765
1766#[cfg(test)]
1767mod tests {
1768    use super::{
1769        ACTIVE_SET_INTERIOR_SEED_MARGIN, LinearInequalityConstraints,
1770        compute_constraint_kkt_diagnostics, project_point_strictly_into_feasible_cone,
1771        project_stationarity_residual_on_constraint_cone,
1772        rank_reduce_rows_pivoted_qr_with_dependence, scaled_constraint_slack,
1773        solve_newton_direction_with_linear_constraints_impl,
1774        solve_quadratic_with_linear_constraints,
1775    };
1776    use approx::assert_relative_eq;
1777    use ndarray::{Array1, Array2, array};
1778
1779    /// A `β = 0` seed sits on the boundary of EVERY row of a homogeneous
1780    /// (`b = 0`) convex/concave second-difference cone — it is the cone vertex.
1781    /// The strict-interior projection must move it to a point with a strictly
1782    /// positive scaled slack on every row, so the inner active-set QP starts
1783    /// from an EMPTY working set rather than an all-rows-active degenerate face
1784    /// (the #873 cache-dependence root cause). The zero seed is the worst case:
1785    /// the nearest interior point is unique up to the margin, and a buggy
1786    /// "min-norm" feasibility fallback would return `0` again.
1787    #[test]
1788    fn strict_interior_projection_lifts_vertex_seed_off_every_constraint_row() {
1789        // Signed second-difference rows of a 5-coefficient concave smooth:
1790        // -(β_{i+2} - 2β_{i+1} + β_i) ≥ 0 for i = 0..3.
1791        let p = 5usize;
1792        let rows = p - 2;
1793        let mut a = Array2::<f64>::zeros((rows, p));
1794        for i in 0..rows {
1795            a[[i, i]] = -1.0;
1796            a[[i, i + 1]] = 2.0;
1797            a[[i, i + 2]] = -1.0;
1798        }
1799        let constraints = LinearInequalityConstraints::new(a, Array1::zeros(rows))
1800            .expect("test constraint shape invariant");
1801
1802        let vertex = Array1::<f64>::zeros(p);
1803        // The vertex is feasible (all rows exactly tight) but on every boundary.
1804        for i in 0..rows {
1805            assert!(
1806                scaled_constraint_slack(&vertex, &constraints, i).abs() < 1e-12,
1807                "vertex seed should sit exactly on row {i}"
1808            );
1809        }
1810
1811        let interior = project_point_strictly_into_feasible_cone(&vertex, &constraints)
1812            .expect("strict-interior projection of the vertex must succeed");
1813        let min_slack = (0..rows)
1814            .map(|i| scaled_constraint_slack(&interior, &constraints, i))
1815            .fold(f64::INFINITY, f64::min);
1816        assert!(
1817            min_slack >= 0.5 * ACTIVE_SET_INTERIOR_SEED_MARGIN,
1818            "projected seed must be strictly interior on every row; min scaled slack = {min_slack:.3e}"
1819        );
1820    }
1821
1822    /// Mirrors `s(x, shape=concave, bc=clamped)`: shape curvature reparameterized
1823    /// to independent coordinate lower bounds `γ_j ≥ 0` (genuine one-sided rows),
1824    /// MERGED with a boundary condition encoded as an anti-parallel inequality
1825    /// PAIR `{r·β ≥ t, −r·β ≥ −t}` (an equality `r·β = t`). A naive
1826    /// shift-every-row-inward projection turns that pair into the empty set
1827    /// `t+δ ≤ r·β ≤ t−δ`, fails, and the caller falls back to the cone vertex —
1828    /// silently reintroducing the #873 seed for the combined case. The
1829    /// anti-parallel-aware margin must leave the equality pair tight while still
1830    /// pushing the genuine shape rows strictly interior.
1831    #[test]
1832    fn strict_interior_projection_keeps_equality_pairs_tight_with_shape_bounds() {
1833        let p = 5usize;
1834        // Rows 0..3: shape lower bounds γ_2,γ_3,γ_4 ≥ 0 (homogeneous, b = 0).
1835        // Rows 3,4: endpoint equality β_0 = 0 as {e_0·β ≥ 0, −e_0·β ≥ 0}.
1836        let m = 3 + 2;
1837        let mut a = Array2::<f64>::zeros((m, p));
1838        a[[0, 2]] = 1.0;
1839        a[[1, 3]] = 1.0;
1840        a[[2, 4]] = 1.0;
1841        a[[3, 0]] = 1.0;
1842        a[[4, 0]] = -1.0;
1843        let constraints = LinearInequalityConstraints::new(a, Array1::zeros(m))
1844            .expect("test constraint shape invariant");
1845
1846        // A seed that violates the shape bounds (negative curvature coords) and
1847        // the equality (β_0 ≠ 0).
1848        let point = Array1::from_vec(vec![0.7, -0.2, -0.5, -0.3, -0.1]);
1849        let seed = project_point_strictly_into_feasible_cone(&point, &constraints).expect(
1850            "strict-interior projection must succeed when an equality pair is present, \
1851             not collapse to the empty set and fall back to the vertex",
1852        );
1853
1854        // Genuine one-sided shape rows are pushed strictly interior.
1855        for i in 0..3 {
1856            assert!(
1857                scaled_constraint_slack(&seed, &constraints, i)
1858                    >= 0.4 * ACTIVE_SET_INTERIOR_SEED_MARGIN,
1859                "shape row {i} not strictly interior: scaled slack = {:.3e}",
1860                scaled_constraint_slack(&seed, &constraints, i)
1861            );
1862        }
1863        // The equality pair stays tight (β_0 ≈ 0), i.e. the seed is projected
1864        // onto the boundary hyperplane rather than shifted off it.
1865        assert!(
1866            seed[0].abs() <= 1e-6,
1867            "boundary equality must be enforced, got β_0 = {:.3e}",
1868            seed[0]
1869        );
1870    }
1871
1872    /// A seed that already carries genuine (concave) curvature and clears the
1873    /// interior margin is returned essentially unchanged — the projection only
1874    /// nudges boundary/violating seeds, it does not discard usable curvature.
1875    #[test]
1876    fn strict_interior_projection_preserves_a_curvature_carrying_seed() {
1877        let p = 5usize;
1878        let rows = p - 2;
1879        let mut a = Array2::<f64>::zeros((rows, p));
1880        for i in 0..rows {
1881            a[[i, i]] = -1.0;
1882            a[[i, i + 1]] = 2.0;
1883            a[[i, i + 2]] = -1.0;
1884        }
1885        let constraints = LinearInequalityConstraints::new(a, Array1::zeros(rows))
1886            .expect("test constraint shape invariant");
1887        // A strictly concave coefficient profile (-(j-2)^2): every second
1888        // difference is -(-2) = +2 > 0 after the concave sign flip, well above
1889        // the interior margin.
1890        let seed = Array1::from_iter((0..p).map(|j| -((j as f64 - 2.0).powi(2))));
1891        let projected = project_point_strictly_into_feasible_cone(&seed, &constraints)
1892            .expect("already-interior seed must project");
1893        let max_move = seed
1894            .iter()
1895            .zip(projected.iter())
1896            .map(|(a, b)| (a - b).abs())
1897            .fold(0.0_f64, f64::max);
1898        assert!(
1899            max_move < 1e-3,
1900            "strictly-interior curvature-carrying seed should be preserved; max move = {max_move:.3e}"
1901        );
1902    }
1903
1904    #[test]
1905    fn maxiter_accepts_current_boundary_solution() {
1906        let hessian = array![[1.0]];
1907        let gradient = array![-1.0];
1908        let beta = array![0.0];
1909        let constraints = LinearInequalityConstraints {
1910            a: array![[-1.0]],
1911            b: array![-0.1],
1912        };
1913        let mut direction = Array1::zeros(1);
1914        let mut active_hint = Vec::new();
1915
1916        solve_newton_direction_with_linear_constraints_impl(
1917            &hessian,
1918            &gradient,
1919            &beta,
1920            &constraints,
1921            &mut direction,
1922            Some(&mut active_hint),
1923            1,
1924        )
1925        .expect("solver should accept the current boundary solution at the iteration limit");
1926
1927        assert_relative_eq!(direction[0], 0.1, epsilon = 1e-12);
1928        assert_eq!(active_hint, vec![0]);
1929    }
1930
1931    #[test]
1932    fn rank_reduce_zero_rows_returns_empty_working_set() {
1933        let a = array![[0.0, 0.0], [0.0, 0.0],];
1934        let b = array![0.0, 0.0];
1935        let groups = vec![vec![0], vec![1]];
1936
1937        let (a_out, b_out, groups_out, _) =
1938            rank_reduce_rows_pivoted_qr_with_dependence(a, b, groups);
1939
1940        assert_eq!(a_out.nrows(), 0);
1941        assert_eq!(a_out.ncols(), 2);
1942        assert_eq!(b_out.len(), 0);
1943        assert!(groups_out.is_empty());
1944    }
1945
1946    #[test]
1947    fn cone_projection_solves_nonnegative_least_squares_not_one_way_pruning() {
1948        let active_a = array![
1949            [0.85258593, -0.77270261],
1950            [-1.22152485, 2.05129351],
1951            [0.22794844, 1.56987265],
1952        ];
1953        let residual = array![-0.50524761, -1.10104911];
1954
1955        let (projected, multipliers) =
1956            project_stationarity_residual_on_constraint_cone(&residual, &active_a)
1957                .expect("cone projection should solve");
1958
1959        let row0 = active_a.row(0);
1960        let expected_mu0 = row0.dot(&residual) / row0.dot(&row0);
1961        assert_relative_eq!(multipliers[0], expected_mu0, epsilon = 1e-8);
1962        assert_relative_eq!(multipliers[1], 0.0, epsilon = 1e-10);
1963        assert_relative_eq!(multipliers[2], 0.0, epsilon = 1e-10);
1964
1965        let raw_norm2 = residual.dot(&residual);
1966        let projected_norm2 = projected.dot(&projected);
1967        assert!(
1968            projected_norm2 < raw_norm2 - 0.1,
1969            "NNLS projection should keep the improving active row: raw={raw_norm2:.6e}, projected={projected_norm2:.6e}"
1970        );
1971        let dual = active_a.dot(&projected);
1972        for (idx, (&mu, &w)) in multipliers.iter().zip(dual.iter()).enumerate() {
1973            if mu <= 1e-10 {
1974                assert!(
1975                    w <= 1e-8,
1976                    "inactive cone generator {idx} has positive reduced gradient {w:.3e}"
1977                );
1978            }
1979        }
1980    }
1981
1982    // #500: the KKT primal residual must be the *geometric* distance to the
1983    // constraint hyperplane — invariant to how the constraint row is scaled.
1984    // A B-spline endpoint-derivative clamp carries a large row norm, so the
1985    // raw slack `a·β − b` of a near-feasible iterate is inflated by ‖a‖ and a
1986    // downstream raw primal gate would spuriously refuse it. The same geometry
1987    // expressed with a unit-norm row must yield the same primal.
1988    #[test]
1989    fn kkt_primal_is_per_row_scale_invariant() {
1990        // β sits 2.071e-8 on the infeasible side of the hyperplane `row·β ≥ 0`
1991        // (the exact geometric residual reported in #500's startup abort).
1992        let geometric_violation = 2.071e-8_f64;
1993        let gradient = Array1::<f64>::zeros(2);
1994
1995        // Unit-norm row: raw slack == geometric distance.
1996        let beta_unit = array![-geometric_violation, 0.0];
1997        let unit = LinearInequalityConstraints {
1998            a: array![[1.0, 0.0]],
1999            b: array![0.0],
2000        };
2001        let diag_unit = compute_constraint_kkt_diagnostics(&beta_unit, &gradient, &unit);
2002
2003        // Same hyperplane, row scaled ×1000: raw slack would be 2.071e-5, but
2004        // the *scaled* primal must still equal the geometric distance.
2005        let beta_big = array![-geometric_violation, 0.0];
2006        let big = LinearInequalityConstraints {
2007            a: array![[1000.0, 0.0]],
2008            b: array![0.0],
2009        };
2010        let diag_big = compute_constraint_kkt_diagnostics(&beta_big, &gradient, &big);
2011
2012        assert_relative_eq!(
2013            diag_unit.primal_feasibility,
2014            geometric_violation,
2015            epsilon = 1e-14
2016        );
2017        assert_relative_eq!(
2018            diag_big.primal_feasibility,
2019            geometric_violation,
2020            epsilon = 1e-14
2021        );
2022        // The scaled diagnostic must NOT report the ‖a‖-inflated raw slack.
2023        assert!(
2024            diag_big.primal_feasibility < 1e-7,
2025            "scaled primal {:.3e} should pass a 1e-7 gate; raw slack would be {:.3e}",
2026            diag_big.primal_feasibility,
2027            1000.0 * geometric_violation
2028        );
2029    }
2030
2031    // A B-spline `bc=clamped`/`bc=anchored` constraint is an EQUALITY
2032    // `a·β = b` encoded as two opposing inequalities `a·β ≥ b` and
2033    // `−a·β ≥ −b`. The active-set solver must drive the unconstrained
2034    // optimum back onto the hyperplane `a·β = b`. This is the isolated
2035    // analogue of the `bc=clamped` startup-validation abort: the exact
2036    // validation solve left `a·β ≈ 7.76` instead of 0, so the KKT primal
2037    // residual blew past tolerance and every seed was refused.
2038    #[test]
2039    fn opposing_inequality_pair_pins_equality_to_target() {
2040        // Minimize ½‖β‖² − rhs·β  (H = I) ⇒ unconstrained optimum β* = rhs.
2041        // rhs = [5,5,0,0] ⇒ a·β* = 10 with a = [1,1,0,0].
2042        // The opposing pair must pull a·β back to the target 0.
2043        let hessian = array![
2044            [1.0, 0.0, 0.0, 0.0],
2045            [0.0, 1.0, 0.0, 0.0],
2046            [0.0, 0.0, 1.0, 0.0],
2047            [0.0, 0.0, 0.0, 1.0],
2048        ];
2049        let rhs = array![5.0, 5.0, 0.0, 0.0];
2050        let beta_start = Array1::<f64>::zeros(4);
2051        let constraints = LinearInequalityConstraints {
2052            a: array![[1.0, 1.0, 0.0, 0.0], [-1.0, -1.0, 0.0, 0.0]],
2053            b: array![0.0, 0.0],
2054        };
2055
2056        let (beta, _active) = solve_quadratic_with_linear_constraints(
2057            &hessian,
2058            &rhs,
2059            &beta_start,
2060            &constraints,
2061            None,
2062        )
2063        .expect("opposing-inequality equality QP must solve");
2064
2065        let a_dot_beta = beta[0] + beta[1];
2066        assert!(
2067            a_dot_beta.abs() < 1e-8,
2068            "opposing inequalities must pin a·β to 0, got {a_dot_beta:.6e} (β = {beta:?})"
2069        );
2070    }
2071
2072    // Same as above but with a non-zero target and a large row norm — the
2073    // exact shape of a B-spline endpoint-derivative clamp, whose rows carry
2074    // ‖a‖ ≫ 1. The equality must still be pinned in geometric coordinates.
2075    #[test]
2076    fn opposing_inequality_pair_pins_scaled_equality_to_nonzero_target() {
2077        let hessian = array![
2078            [1.0, 0.0, 0.0, 0.0],
2079            [0.0, 1.0, 0.0, 0.0],
2080            [0.0, 0.0, 1.0, 0.0],
2081            [0.0, 0.0, 0.0, 1.0],
2082        ];
2083        let rhs = array![5.0, 5.0, 0.0, 0.0];
2084        let beta_start = Array1::<f64>::zeros(4);
2085        // Row scaled ×1000 (mimics a derivative-clamp row norm) with target 3000
2086        // ⇒ geometric target a·β = 3.0 in unit coordinates.
2087        let constraints = LinearInequalityConstraints {
2088            a: array![[1000.0, 1000.0, 0.0, 0.0], [-1000.0, -1000.0, 0.0, 0.0]],
2089            b: array![3000.0, -3000.0],
2090        };
2091
2092        let (beta, _active) = solve_quadratic_with_linear_constraints(
2093            &hessian,
2094            &rhs,
2095            &beta_start,
2096            &constraints,
2097            None,
2098        )
2099        .expect("scaled opposing-inequality equality QP must solve");
2100
2101        let a_dot_beta = 1000.0 * (beta[0] + beta[1]);
2102        assert!(
2103            (a_dot_beta - 3000.0).abs() < 1e-5,
2104            "opposing inequalities must pin a·β to 3000, got {a_dot_beta:.6e} (β = {beta:?})"
2105        );
2106    }
2107
2108    // `bc=clamped` at BOTH ends produces TWO opposing-inequality equalities
2109    // (4 rows total). The real abort reports `active=2/4` — only ONE of the
2110    // two equalities is being pinned. Reproduce two independent equalities
2111    // and require BOTH to be driven to their targets.
2112    #[test]
2113    fn two_opposing_inequality_equalities_both_pinned() {
2114        let hessian = array![
2115            [1.0, 0.0, 0.0, 0.0],
2116            [0.0, 1.0, 0.0, 0.0],
2117            [0.0, 0.0, 1.0, 0.0],
2118            [0.0, 0.0, 0.0, 1.0],
2119        ];
2120        let rhs = array![5.0, 5.0, 5.0, 5.0];
2121        let beta_start = Array1::<f64>::zeros(4);
2122        // Equality A: β0 + β1 = 0 (rows 0,1). Equality B: β2 + β3 = 0 (rows 2,3).
2123        let constraints = LinearInequalityConstraints {
2124            a: array![
2125                [1.0, 1.0, 0.0, 0.0],
2126                [-1.0, -1.0, 0.0, 0.0],
2127                [0.0, 0.0, 1.0, 1.0],
2128                [0.0, 0.0, -1.0, -1.0],
2129            ],
2130            b: array![0.0, 0.0, 0.0, 0.0],
2131        };
2132
2133        let (beta, _active) = solve_quadratic_with_linear_constraints(
2134            &hessian,
2135            &rhs,
2136            &beta_start,
2137            &constraints,
2138            None,
2139        )
2140        .expect("two-equality QP must solve");
2141
2142        assert!(
2143            (beta[0] + beta[1]).abs() < 1e-8,
2144            "equality A not pinned: β0+β1 = {:.6e}",
2145            beta[0] + beta[1]
2146        );
2147        assert!(
2148            (beta[2] + beta[3]).abs() < 1e-8,
2149            "equality B not pinned: β2+β3 = {:.6e}",
2150            beta[2] + beta[3]
2151        );
2152    }
2153
2154    // Faithful to the failing fit: the penalized IRLS Hessian `X'WX + λS`
2155    // with λ at the over-smoothing ceiling is severely ill-conditioned — the
2156    // penalty `S` is rank-deficient (null space = the unpenalized polynomial
2157    // part), so directions in null(S) are governed by a tiny `X'WX` block
2158    // while penalized directions carry a huge λ. The opposing-inequality
2159    // equalities must STILL be pinned under this conditioning.
2160    #[test]
2161    fn opposing_inequality_equalities_pinned_under_ill_conditioned_penalty() {
2162        // H = diag(1, 1, λ, λ) with λ = 1e8 — penalized directions 2,3 are
2163        // ~1e8 stiffer than the data directions 0,1.
2164        let lam = 1.0e8_f64;
2165        let hessian = array![
2166            [1.0, 0.0, 0.0, 0.0],
2167            [0.0, 1.0, 0.0, 0.0],
2168            [0.0, 0.0, lam, 0.0],
2169            [0.0, 0.0, 0.0, lam],
2170        ];
2171        let rhs = array![5.0, 5.0, 5.0, 5.0];
2172        let beta_start = Array1::<f64>::zeros(4);
2173        // Two equalities that COUPLE a stiff and a soft coordinate, like a
2174        // B-spline derivative row spanning penalized and unpenalized parts:
2175        // A: β0 + β2 = 0, B: β1 + β3 = 0.
2176        let constraints = LinearInequalityConstraints {
2177            a: array![
2178                [1.0, 0.0, 1.0, 0.0],
2179                [-1.0, 0.0, -1.0, 0.0],
2180                [0.0, 1.0, 0.0, 1.0],
2181                [0.0, -1.0, 0.0, -1.0],
2182            ],
2183            b: array![0.0, 0.0, 0.0, 0.0],
2184        };
2185
2186        let (beta, _active) = solve_quadratic_with_linear_constraints(
2187            &hessian,
2188            &rhs,
2189            &beta_start,
2190            &constraints,
2191            None,
2192        )
2193        .expect("ill-conditioned two-equality QP must solve");
2194
2195        assert!(
2196            (beta[0] + beta[2]).abs() < 1e-6,
2197            "equality A not pinned under ill-conditioning: β0+β2 = {:.6e}",
2198            beta[0] + beta[2]
2199        );
2200        assert!(
2201            (beta[1] + beta[3]).abs() < 1e-6,
2202            "equality B not pinned under ill-conditioning: β1+β3 = {:.6e}",
2203            beta[1] + beta[3]
2204        );
2205    }
2206}