gam-terms 0.3.127

//! Constant-curvature (`M_κ`) smooth term: basis + penalty over the
//! κ-stereographic chart (#944, stage 3 step 1).
//!
//! The term is the κ-generic sibling of the intrinsic-S² Wahba smooth
//! (`sphere_spec.rs` / `build_spherical_spline_basis`): a reproducing-kernel
//! basis on a center set, with the kernel Gram on the centers as the RKHS
//! roughness penalty and a coefficient-space sum-to-zero constraint for
//! identifiability. Where the Wahba smooth hard-codes S² (lat/lon chart,
//! Legendre kernels), this term takes the geometry from
//! [`gam_geometry::constant_curvature::ConstantCurvature`] at an explicit
//! curvature κ, so one construction covers the whole interpolation
//! `S^d(1/√κ) → ℝ^d → H^d(1/√−κ)` through κ = 0.
//!
//! # Kernel
//!
//! `K_κ(x, y) = exp(−d_κ(x, y) / ℓ)` — the geodesic-exponential kernel, where
//! `d_κ` is the exact constant-curvature geodesic distance in the
//! κ-stereographic chart. The geodesic distance is a kernel of conditionally
//! negative type on all three constant-curvature space forms (Schoenberg 1942
//! for `S^d`; classical CND of `‖·‖` on `ℝ^d`; Faraut–Harzallah 1974 for
//! `H^d`), so `exp(−c·d_κ)` is positive definite for every `c > 0` and every
//! κ — the Gram on distinct centers is strictly PD, which is exactly what the
//! RKHS penalty construction needs. At κ = 0 the chart carries the doubled
//! gauge (`metric 4δ`, `d_0(x, y) = 2‖x − y‖`), so the κ = 0 term is the
//! Euclidean exponential (Matérn-½) kernel smooth with effective Euclidean
//! range `ℓ/2`.
//!
//! # κ-differentiability contract (what the ψ-channel stage consumes)
//!
//! Every κ-moving piece of this construction is differentiable in κ via the
//! exact κ-jets landed in stage 2, and every κ-FIXED piece is documented as
//! such so the later ψ-channel wiring (`∂X/∂κ`, `∂S/∂κ` into the LAML outer
//! gradient, Matérn iso-κ optimizer as the template) needs no new calculus:
//!
//! - **Centers are κ-fixed.** Center selection runs in chart coordinates
//!   (farthest-point / k-means / user-provided) and deliberately does NOT
//!   consult κ, so `∂(centers)/∂κ ≡ 0` and the design moves with κ only
//!   through the kernel. A κ-dependent center rule would add an
//!   uncontrolled, non-smooth term to the design drift.
//! - **The length scale ℓ is κ-fixed.** The auto-initialized ℓ is derived
//!   from chart-coordinate (κ = 0 gauge) center spacing only, and an
//!   explicit user ℓ is a constant. `∂ℓ/∂κ ≡ 0`.
//! - **The constraint transform `z` is κ-fixed.** Uniform coefficient
//!   weights; at fit time the global identifiability pipeline composes the
//!   parametric orthogonalization onto it and the result is FROZEN
//!   (mirroring `SphericalSplineIdentifiability::FrozenTransform`, #532), so
//!   the predict/ψ-trial rebuild replays the same `z` verbatim.
//! - **The kernel has exact κ-jets.** `∂K/∂κ` and `∂²K/∂κ²` follow from
//!   `distance_kappa_jet` (Tower4-exact, FD-gated) by the chain rule — see
//!   [`constant_curvature_kernel_kappa_jets`]. Therefore:
//!   `∂X_raw/∂κ = ∂K(data, centers)/∂κ`, realized design drift
//!   `∂X/∂κ = (∂K/∂κ)·z`, and penalty drift `∂S_raw/∂κ = zᵀ(∂K(centers,
//!   centers)/∂κ)z` are all available in closed form from this module today.
//!   (The penalty handed to the optimizer is Frobenius-normalized; the
//!   ψ-channel must route its κ-derivative through the same normalization
//!   rule — `normalize_penaltywith_psi_derivatives` is the existing seam.)
//! - **Available but not yet consumed:** `log_map_kappa_jet` /
//!   `exp_map_kappa_jet` cover future geodesic/normal-coordinate basis
//!   variants (e.g. tangent-space designs); the distance jet is the only one
//!   this kernel construction needs.

use ndarray::{Array1, Array2, ArrayView1, ArrayView2, Axis};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};

use gam_geometry::constant_curvature::{ConstantCurvature, distance_kappa_jet};

use super::{
    BasisBuildResult, BasisError, BasisMetadata, BasisPsiDerivativeBundle,
    BasisPsiDerivativeResult, BasisPsiSecondDerivativeResult, CenterStrategy, PenaltyCandidate,
    PenaltyInfo, PenaltySource, filter_active_penalty_candidates_with_ops, normalize_penalty,
    select_centers_by_strategy, weighted_coefficient_sum_to_zero_transform,
};

/// Realized-design identifiability policy for the constant-curvature smooth.
/// Mirrors [`super::SphericalSplineIdentifiability`] (#532): the fit-time
/// center-space sum-to-zero `z` gets the parametric orthogonalization composed
/// onto it by the global identifiability pipeline, and the composed transform
/// is frozen here so predict-time (and future per-ψ-trial) rebuilds replay it
/// verbatim instead of recomputing `z` from the centers.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub enum ConstantCurvatureIdentifiability {
    /// Fit-time default: uniform-weight coefficient sum-to-zero over the
    /// centers (`Σ_j α_j = 0`), then global parametric residualization.
    #[default]
    CenterSumToZero,
    /// Predict-time replay: the frozen composed transform captured at fit
    /// time. `transform.nrows()` equals the number of centers.
    FrozenTransform { transform: Array2<f64> },
}

/// Constant-curvature smooth configuration (`curv(x, z, kappa = …)`).
///
/// The chart inputs are the raw feature columns interpreted as
/// κ-stereographic chart coordinates: any finite point for κ ≥ 0, the open
/// ball `‖x‖ < 1/√(−κ)` for κ < 0. The default κ = 0 reproduces a Euclidean
/// exponential-kernel smooth (in the doubled κ = 0 chart gauge), so the term
/// is safe to use as a drop-in flat smooth until κ becomes a fitted
/// ψ-coordinate.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConstantCurvatureBasisSpec {
    /// Center/knot selection strategy in chart coordinates. Deliberately
    /// κ-independent (see the module-level κ-contract).
    pub center_strategy: CenterStrategy,
    /// Sectional curvature κ of the latent/feature geometry. Fixed at build
    /// time; the later ψ-channel stage promotes it to a fitted outer
    /// coordinate consuming this module's exact κ-jets.
    pub kappa: f64,
    /// Geodesic kernel range ℓ in `K_κ = exp(−d_κ/ℓ)`. The `0.0` sentinel
    /// requests the κ-independent auto initialization
    /// ([`realized_constant_curvature_length_scale`]); the realized value is
    /// persisted in [`BasisMetadata::ConstantCurvature`] and frozen back into
    /// the spec for predict-time replay.
    pub length_scale: f64,
    /// Add the ridge-like shrinkage penalty alongside the RKHS Gram penalty.
    pub double_penalty: bool,
    /// Realized-design identifiability policy (see type docs).
    #[serde(default)]
    pub identifiability: ConstantCurvatureIdentifiability,
}

impl Default for ConstantCurvatureBasisSpec {
    fn default() -> Self {
        Self {
            center_strategy: CenterStrategy::FarthestPoint { num_centers: 50 },
            kappa: 0.0,
            length_scale: 0.0,
            // No double-penalty ridge by default (#1464). The RKHS Gram penalty
            // zᵀKz is strictly PD/full-rank on distinct centers, so it already
            // regularizes every coefficient direction — the ridge `I` adds no
            // stability. Worse, `I` is curvature-BLIND: with its own λ it absorbs
            // the data fit independently of κ, so the κ outer coordinate sees only
            // the monotone Occam term (positive κ compresses geodesic distances →
            // kernel log-det shrinks) and rails to the +chart bound for any curved
            // data, recovering hyperbolic truth as spherical. Dropping the ridge
            // matches the single-penalty profiled-REML oracle
            // (`profiled_reml_identifies_curvature_sign_with_effective_length`),
            // which identifies the curvature SIGN.
            double_penalty: false,
            identifiability: ConstantCurvatureIdentifiability::CenterSumToZero,
        }
    }
}

/// Validate that every row of `points` is finite and inside the
/// κ-stereographic chart (`1 + κ‖x‖² > 0`; automatic for κ ≥ 0, the open-ball
/// constraint for κ < 0).
pub(crate) fn validate_chart_points(
    points: ArrayView2<'_, f64>,
    kappa: f64,
    what: &str,
) -> Result<(), BasisError> {
    for (i, row) in points.outer_iter().enumerate() {
        let mut nx2 = 0.0_f64;
        for &v in row.iter() {
            if !v.is_finite() {
                crate::bail_invalid_basis!(
                    "constant-curvature {what} row {i} has a non-finite coordinate"
                );
            }
            nx2 += v * v;
        }
        if 1.0 + kappa * nx2 <= 0.0 {
            crate::bail_invalid_basis!(
                "constant-curvature {what} row {i} lies outside the κ-stereographic chart \
                 (need 1 + κ·‖x‖² > 0; got κ = {kappa}, ‖x‖² = {nx2}); for κ < 0 the chart is \
                 the open ball ‖x‖ < 1/√(−κ)"
            );
        }
    }
    Ok(())
}

/// `K_κ(data, centers)` — the geodesic-exponential kernel matrix
/// `exp(−d_κ(x_i, c_j)/ℓ)`.
pub fn constant_curvature_kernel_matrix(
    data: ArrayView2<'_, f64>,
    centers: ArrayView2<'_, f64>,
    kappa: f64,
    length_scale: f64,
) -> Result<Array2<f64>, BasisError> {
    if data.ncols() != centers.ncols() {
        crate::bail_dim_basis!(
            "constant-curvature kernel dimension mismatch: data d={} centers d={}",
            data.ncols(),
            centers.ncols()
        );
    }
    if !(length_scale.is_finite() && length_scale > 0.0) {
        crate::bail_invalid_basis!(
            "constant-curvature kernel needs a positive finite length_scale; got {length_scale}"
        );
    }
    validate_chart_points(data, kappa, "data")?;
    validate_chart_points(centers, kappa, "centers")?;
    let manifold = ConstantCurvature::new(data.ncols(), kappa);
    let mut out = Array2::<f64>::zeros((data.nrows(), centers.nrows()));
    out.axis_iter_mut(Axis(0))
        .into_par_iter()
        .enumerate()
        .try_for_each(|(i, mut row)| -> Result<(), BasisError> {
            for (j, c) in centers.outer_iter().enumerate() {
                let d = manifold.distance(data.row(i), c).map_err(|e| {
                    BasisError::InvalidInput(format!(
                        "constant-curvature distance failed at (row {i}, center {j}): {e}"
                    ))
                })?;
                row[j] = (-d / length_scale).exp();
            }
            Ok(())
        })?;
    Ok(out)
}

/// `(K, ∂K/∂κ, ∂²K/∂κ²)` of the raw (pre-constraint) kernel matrix — the
/// ψ-channel hook. Exact: rides `distance_kappa_jet` (Tower4, FD-gated in
/// `geometry::constant_curvature`) through the chain rule for
/// `K = exp(−d/ℓ)` at κ-FIXED ℓ and centers (see the module κ-contract):
///
/// ```text
///   ∂K/∂κ  = −(d′/ℓ) · K
///   ∂²K/∂κ² = ((d′/ℓ)² − d″/ℓ) · K
/// ```
///
/// The realized design/penalty drifts follow by the κ-fixed transforms:
/// `∂X/∂κ = (∂K/∂κ)·z` and `∂S_raw/∂κ = zᵀ(∂K/∂κ)z` (centers×centers), with
/// the Frobenius penalty normalization differentiated by the existing
/// `normalize_penaltywith_psi_derivatives` seam.
pub fn constant_curvature_kernel_kappa_jets(
    data: ArrayView2<'_, f64>,
    centers: ArrayView2<'_, f64>,
    kappa: f64,
    length_scale: f64,
) -> Result<(Array2<f64>, Array2<f64>, Array2<f64>), BasisError> {
    if data.ncols() != centers.ncols() {
        crate::bail_dim_basis!(
            "constant-curvature kernel-jet dimension mismatch: data d={} centers d={}",
            data.ncols(),
            centers.ncols()
        );
    }
    if !(length_scale.is_finite() && length_scale > 0.0) {
        crate::bail_invalid_basis!(
            "constant-curvature kernel jets need a positive finite length_scale; got {length_scale}"
        );
    }
    validate_chart_points(data, kappa, "data")?;
    validate_chart_points(centers, kappa, "centers")?;
    let manifold = ConstantCurvature::new(data.ncols(), kappa);
    let n = data.nrows();
    let m = centers.nrows();
    let mut value = Array2::<f64>::zeros((n, m));
    let mut dk = Array2::<f64>::zeros((n, m));
    let mut dkk = Array2::<f64>::zeros((n, m));
    let rows: Vec<(usize, Vec<(f64, f64, f64)>)> = (0..n)
        .into_par_iter()
        .map(|i| -> Result<(usize, Vec<(f64, f64, f64)>), BasisError> {
            let mut row = Vec::with_capacity(m);
            for (j, c) in centers.outer_iter().enumerate() {
                let (d, d1, d2) = distance_kappa_jet(&manifold, data.row(i), c).map_err(|e| {
                    BasisError::InvalidInput(format!(
                        "constant-curvature distance κ-jet failed at (row {i}, center {j}): {e}"
                    ))
                })?;
                let k = (-d / length_scale).exp();
                let g = d1 / length_scale;
                row.push((k, -g * k, (g * g - d2 / length_scale) * k));
            }
            Ok((i, row))
        })
        .collect::<Result<Vec<_>, BasisError>>()?;
    for (i, row) in rows {
        for (j, (k, k1, k2)) in row.into_iter().enumerate() {
            value[(i, j)] = k;
            dk[(i, j)] = k1;
            dkk[(i, j)] = k2;
        }
    }
    Ok((value, dk, dkk))
}

/// `(K, ∂K/∂κ, ∂²K/∂κ²)` of the raw kernel matrix when the kernel uses the
/// fill-invariant effective length `L(κ)` (the #944 fix: `L` solves the fill
/// target `g(L,κ)=fill⋆`, holding the kernel's effective DoF κ-invariant). Both
/// the geodesic distance `d_κ` and the length `L(κ)` move with κ, so the exponent
/// is the quotient `q = d/L` and the chain rule carries both jets:
///
/// ```text
///   q  = d / L
///   q′ = d′/L − d·L′/L²
///   q″ = d″/L − 2 d′ L′/L² − d L″/L² + 2 d (L′)²/L³
///   K = e^{−q},  K′ = −q′K,  K″ = ((q′)² − q″) K
/// ```
///
/// `l_jet = (L, L′, L″)` is the effective-length κ-jet from
/// [`constant_curvature_effective_length_jet`]; at κ = 0 it reduces to the
/// fixed-ℓ jets (`L′ = L″` terms vanish only if the geometry is flat, but the
/// formula is exact for all κ).
pub(crate) fn constant_curvature_kernel_kappa_jets_scaled(
    data: ArrayView2<'_, f64>,
    centers: ArrayView2<'_, f64>,
    kappa: f64,
    l_jet: (f64, f64, f64),
) -> Result<(Array2<f64>, Array2<f64>, Array2<f64>), BasisError> {
    if data.ncols() != centers.ncols() {
        crate::bail_dim_basis!(
            "constant-curvature scaled kernel-jet dimension mismatch: data d={} centers d={}",
            data.ncols(),
            centers.ncols()
        );
    }
    let (l, l1, l2) = l_jet;
    if !(l.is_finite() && l > 0.0) {
        crate::bail_invalid_basis!(
            "constant-curvature scaled kernel jets need a positive finite effective length; got {l}"
        );
    }
    validate_chart_points(data, kappa, "data")?;
    validate_chart_points(centers, kappa, "centers")?;
    let manifold = ConstantCurvature::new(data.ncols(), kappa);
    let n = data.nrows();
    let m = centers.nrows();
    let mut value = Array2::<f64>::zeros((n, m));
    let mut dk = Array2::<f64>::zeros((n, m));
    let mut dkk = Array2::<f64>::zeros((n, m));
    let rows: Vec<(usize, Vec<(f64, f64, f64)>)> = (0..n)
        .into_par_iter()
        .map(|i| -> Result<(usize, Vec<(f64, f64, f64)>), BasisError> {
            let mut row = Vec::with_capacity(m);
            for (j, c) in centers.outer_iter().enumerate() {
                let (d, d1, d2) = distance_kappa_jet(&manifold, data.row(i), c).map_err(|e| {
                    BasisError::InvalidInput(format!(
                        "constant-curvature scaled distance κ-jet failed at (row {i}, center {j}): {e}"
                    ))
                })?;
                let q = d / l;
                let q1 = d1 / l - d * l1 / (l * l);
                let q2 = d2 / l - 2.0 * d1 * l1 / (l * l) - d * l2 / (l * l)
                    + 2.0 * d * l1 * l1 / (l * l * l);
                let k = (-q).exp();
                row.push((k, -q1 * k, (q1 * q1 - q2) * k));
            }
            Ok((i, row))
        })
        .collect::<Result<Vec<_>, BasisError>>()?;
    for (i, row) in rows {
        for (j, (k, k1, k2)) in row.into_iter().enumerate() {
            value[(i, j)] = k;
            dk[(i, j)] = k1;
            dkk[(i, j)] = k2;
        }
    }
    Ok((value, dk, dkk))
}

/// Resolve the realized kernel range ℓ. An explicit positive `spec_length_scale`
/// is used verbatim; the `0.0` sentinel auto-initializes from the median
/// pairwise CHART distance among the centers, doubled to match the κ = 0
/// chart gauge (`d_0 = 2‖Δ‖`).
///
/// κ-contract: the auto rule reads chart coordinates only — it never consults
/// κ — so the realized ℓ is a κ-CONSTANT and contributes no `∂ℓ/∂κ` term to
/// the design drift.
pub fn realized_constant_curvature_length_scale(
    centers: ArrayView2<'_, f64>,
    spec_length_scale: f64,
) -> Result<f64, BasisError> {
    if spec_length_scale.is_finite() && spec_length_scale > 0.0 {
        return Ok(spec_length_scale);
    }
    if spec_length_scale != 0.0 {
        crate::bail_invalid_basis!(
            "constant-curvature length_scale must be positive (or 0.0 for auto); got {spec_length_scale}"
        );
    }
    let m = centers.nrows();
    if m < 2 {
        return Err(BasisError::InsufficientColumnsForConstraint { found: m });
    }
    let mut dists: Vec<f64> = Vec::with_capacity(m * (m - 1) / 2);
    for i in 0..m {
        for j in (i + 1)..m {
            let mut s = 0.0_f64;
            for k in 0..centers.ncols() {
                let dlt = centers[(i, k)] - centers[(j, k)];
                s += dlt * dlt;
            }
            dists.push(2.0 * s.sqrt());
        }
    }
    dists.sort_by(|a, b| a.partial_cmp(b).expect("finite chart distances"));
    let median = dists[dists.len() / 2];
    if !(median.is_finite() && median > 0.0) {
        crate::bail_invalid_basis!(
            "constant-curvature auto length_scale failed: centers are degenerate \
             (median pairwise chart distance = {median})"
        );
    }
    Ok(median)
}

/// Reference kernel "fill" `fill⋆` — the κ = 0 mean data→center kernel entry
/// `(1/N) Σᵢⱼ exp(−d₀(xᵢ,cⱼ)/ℓ_ref)` with `d₀ = 2‖Δ‖` the κ = 0 chart gauge.
///
/// The fill is the scalar that measures the kernel's *effective resolution* (how
/// much each data row "sees" the centers): it is monotone in `ℓ/scale`, so
/// pinning it across κ pins the realized design's flexibility (its effective
/// degrees of freedom). [`constant_curvature_effective_length_jet`] solves
/// `g(L,κ) = fill⋆` for `L(κ)` so the fill — hence the basis flexibility — stays
/// κ-invariant and only the distance-matrix SHAPE (the genuine curvature signal)
/// moves with κ. At κ = 0 the solution is `L = ℓ_ref` by construction.
pub(crate) fn data_center_reference_fill(
    data: ArrayView2<'_, f64>,
    centers: ArrayView2<'_, f64>,
    ell_ref: f64,
) -> Result<f64, BasisError> {
    if !(ell_ref.is_finite() && ell_ref > 0.0) {
        crate::bail_invalid_basis!(
            "constant-curvature reference fill needs a positive finite ℓ_ref; got {ell_ref}"
        );
    }
    let mut sum = 0.0_f64;
    let mut cnt = 0.0_f64;
    for xi in data.outer_iter() {
        for cj in centers.outer_iter() {
            let mut s = 0.0_f64;
            for k in 0..centers.ncols() {
                let dlt = xi[k] - cj[k];
                s += dlt * dlt;
            }
            let d0 = 2.0 * s.sqrt(); // κ = 0 chart gauge d₀ = 2‖Δ‖
            sum += (-d0 / ell_ref).exp();
            cnt += 1.0;
        }
    }
    if cnt <= 0.0 {
        crate::bail_invalid_basis!(
            "constant-curvature reference fill needs at least one data row and one center"
        );
    }
    Ok(sum / cnt)
}

/// The mean-kernel-entry "fill" `g(L,κ) = (1/N) Σᵢⱼ exp(−d_κ(xᵢ,cⱼ)/L)` together
/// with the five partials needed by the implicit-function jet:
/// `(g, g_L, g_κ, g_LL, g_κκ, g_Lκ)`.
///
/// With `k = exp(−d/L)` and the per-pair geodesic jet `(d, d', d'')` (exact via
/// [`distance_kappa_jet`]):
///
/// ```text
///   ∂k/∂L = k·d/L²,                  ∂k/∂κ = −k·d'/L
///   g_LL  = (1/N)Σ k·d·(d − 2L)/L⁴
///   g_κκ  = (1/N)Σ k·((d')²/L − d'')/L
///   g_Lκ  = (1/N)Σ k·d'·(L − d)/L³
/// ```
///
/// (each obtained by differentiating `∂k/∂L` / `∂k/∂κ` once more). `g` and every
/// partial are smooth through κ = 0 because the distance jet is entire there.
pub(crate) fn data_center_fill_partials(
    data: ArrayView2<'_, f64>,
    centers: ArrayView2<'_, f64>,
    kappa: f64,
    l: f64,
) -> Result<(f64, f64, f64, f64, f64, f64), BasisError> {
    if !(l.is_finite() && l > 0.0) {
        crate::bail_invalid_basis!(
            "constant-curvature fill partials need a positive finite length; got {l}"
        );
    }
    let manifold = ConstantCurvature::new(centers.ncols(), kappa);
    let l2 = l * l;
    let l3 = l2 * l;
    let l4 = l2 * l2;
    let mut g = 0.0_f64;
    let mut g_l = 0.0_f64;
    let mut g_k = 0.0_f64;
    let mut g_ll = 0.0_f64;
    let mut g_kk = 0.0_f64;
    let mut g_lk = 0.0_f64;
    let mut cnt = 0.0_f64;
    for xi in data.outer_iter() {
        for cj in centers.outer_iter() {
            let (d, d1, d2) = distance_kappa_jet(&manifold, xi, cj).map_err(|e| {
                BasisError::InvalidInput(format!(
                    "constant-curvature data→center fill κ-jet failed: {e}"
                ))
            })?;
            let k = (-d / l).exp();
            g += k;
            g_l += k * d / l2;
            g_k += -k * d1 / l;
            g_ll += k * d * (d - 2.0 * l) / l4;
            g_kk += k * ((d1 * d1) / l - d2) / l;
            g_lk += k * d1 * (l - d) / l3;
            cnt += 1.0;
        }
    }
    if cnt <= 0.0 {
        crate::bail_invalid_basis!(
            "constant-curvature fill partials need at least one data row and one center"
        );
    }
    Ok((
        g / cnt,
        g_l / cnt,
        g_k / cnt,
        g_ll / cnt,
        g_kk / cnt,
        g_lk / cnt,
    ))
}

/// Effective kernel length `L(κ)` and its EXACT κ-jet `(L, L′, L″)`.
///
/// THE κ-IDENTIFICATION FIX (#944). A κ-FROZEN length makes the geodesic-
/// exponential kernel's *resolution* drift with κ: spherical (κ>0) geometries
/// compress geodesic distances, narrowing the kernel relative to the data and
/// inflating the basis's effective flexibility, so REML buys a lower deviance by
/// cranking κ up — κ rails to the chart bound for every truth (the #944/#1059
/// symptom). The earlier #1059 fix normalized by the mean data→center geodesic
/// distance `s_dc(κ)`; but holding the mean DISTANCE fixed does NOT hold the
/// kernel's flexibility fixed — the effective degrees of freedom still drift
/// ~30% across the bracket (verified), so the deviance stayed monotone in κ.
///
/// We instead hold the kernel's "fill" — the mean realized kernel entry
/// `g(L,κ) = (1/N) Σᵢⱼ exp(−d_κ(xᵢ,cⱼ)/L)` — κ-INVARIANT, which pins the
/// realized design's effective degrees of freedom (the EDF is flat to <0.5% in κ
/// under this rule, verified numerically). `L(κ)` is the implicit solution of
///
/// ```text
///   g(L(κ), κ) = fill⋆,   fill⋆ = g(ℓ_ref, 0)   (the κ=0 reference fill)
/// ```
///
/// so changing κ moves ONLY the distance-matrix SHAPE (the genuine curvature
/// signal), giving `V_p(κ)` an interior minimum at the data-generating κ for
/// curved truth. At κ = 0 the solution is `L = ℓ_ref` exactly.
///
/// The jet is EXACT via the implicit-function theorem. Differentiating
/// `g(L(κ),κ) ≡ fill⋆` once gives `g_L·L′ + g_κ = 0`, and once more gives
/// `g_LL·(L′)² + 2 g_Lκ·L′ + g_κκ + g_L·L″ = 0`:
///
/// ```text
///   L′  = −g_κ / g_L
///   L″  = −( g_LL·(L′)² + 2 g_Lκ·L′ + g_κκ ) / g_L .
/// ```
///
/// The partials come from [`data_center_fill_partials`] (exact, riding
/// `distance_kappa_jet`); the returned jet feeds `constant_curvature_kernel_
/// kappa_jets_scaled` through the quotient `q = d/L` chain rule.
///
/// Public scalar view of the κ-invariant effective kernel length `L(κ)` that the
/// realized constant-curvature design/penalty are built at (the #944 fill-
/// invariance fix). The forward build evaluates the geodesic-exponential kernel
/// at this `L(κ)`, NOT at the κ = 0 reference length `ell_ref`, so any external
/// consumer reconstructing `K(·)` to compare against the realized design must
/// use this length. Equals `ell_ref` exactly at κ = 0.
pub fn constant_curvature_effective_length(
    data: ArrayView2<'_, f64>,
    centers: ArrayView2<'_, f64>,
    ell_ref: f64,
    kappa: f64,
) -> Result<f64, BasisError> {
    Ok(constant_curvature_effective_length_jet(data, centers, ell_ref, kappa)?.0)
}

pub(crate) fn constant_curvature_effective_length_jet(
    data: ArrayView2<'_, f64>,
    centers: ArrayView2<'_, f64>,
    ell_ref: f64,
    kappa: f64,
) -> Result<(f64, f64, f64), BasisError> {
    let fill_star = data_center_reference_fill(data, centers, ell_ref)?;
    // Newton solve g(L, κ) = fill⋆ for L, warm-started at ℓ_ref (the exact root
    // at κ = 0). g is strictly increasing in L (g_L > 0: larger L ⇒ each entry
    // closer to 1), so Newton from ℓ_ref converges monotonically.
    let mut l = ell_ref;
    const NEWTON_MAX_ITER: usize = 100;
    const NEWTON_REL_TOL: f64 = 1.0e-13;
    let mut converged = false;
    for _ in 0..NEWTON_MAX_ITER {
        let (g, g_l, ..) = data_center_fill_partials(data, centers, kappa, l)?;
        if !(g_l.is_finite() && g_l > 0.0) {
            crate::bail_invalid_basis!(
                "constant-curvature effective length: non-positive fill slope g_L = {g_l} \
                 (degenerate data/centers at κ = {kappa})"
            );
        }
        let step = (g - fill_star) / g_l;
        l -= step;
        if !(l.is_finite() && l > 0.0) {
            crate::bail_invalid_basis!(
                "constant-curvature effective length: Newton left the positive axis (L = {l}) \
                 solving the fill target at κ = {kappa}"
            );
        }
        if step.abs() <= NEWTON_REL_TOL * l {
            converged = true;
            break;
        }
    }
    if !converged {
        crate::bail_invalid_basis!(
            "constant-curvature effective length: fill-target Newton did not converge at κ = {kappa}"
        );
    }
    // Exact implicit-function-theorem jet at the converged root.
    let (_, g_l, g_k, g_ll, g_kk, g_lk) = data_center_fill_partials(data, centers, kappa, l)?;
    let l1 = -g_k / g_l;
    let l2 = -(g_ll * l1 * l1 + 2.0 * g_lk * l1 + g_kk) / g_l;
    Ok((l, l1, l2))
}

/// Build the constant-curvature reproducing-kernel smooth: realized design
/// `K_κ(data, centers)·z`, RKHS penalty `zᵀ K_κ(centers, centers) z`, and the
/// replayable [`BasisMetadata::ConstantCurvature`]. Structure mirrors the
/// Wahba S² builder (`build_spherical_spline_basis`); geometry comes from
/// `ConstantCurvature` at the spec's fixed κ.
pub fn build_constant_curvature_basis(
    data: ArrayView2<'_, f64>,
    spec: &ConstantCurvatureBasisSpec,
) -> Result<BasisBuildResult, BasisError> {
    if data.ncols() == 0 {
        crate::bail_invalid_basis!("constant-curvature smooth needs at least one feature column");
    }
    if !spec.kappa.is_finite() {
        crate::bail_invalid_basis!("constant-curvature smooth needs a finite kappa");
    }
    validate_chart_points(data, spec.kappa, "data")?;
    let centers = select_centers_by_strategy(data, &spec.center_strategy)?;
    if centers.nrows() < 2 {
        return Err(BasisError::InsufficientColumnsForConstraint {
            found: centers.nrows(),
        });
    }
    validate_chart_points(centers.view(), spec.kappa, "centers")?;
    // ℓ_ref is the κ = 0 reference length (auto = mean chart spacing, or the
    // user/frozen value); the kernel uses the κ-invariant effective length
    // L(κ) = ℓ_ref·s(κ)/s₀ so changing κ moves the geometry, not the kernel
    // resolution (the #1059 curvature-identification fix). At κ = 0, L = ℓ_ref.
    let length_scale = realized_constant_curvature_length_scale(centers.view(), spec.length_scale)?;
    // DESIGN effective length L(κ): solved against the DATA→center fill so the
    // realized design's effective DOF stays κ-invariant (#944/#1059). The design
    // X = K(data, centers)·z is built at this L.
    let (ell_eff, _, _) =
        constant_curvature_effective_length_jet(data, centers.view(), length_scale, spec.kappa)?;
    // PENALTY effective length L_S(κ): solved against the CENTER→center fill so
    // the penalty Gram S = zᵀK(centers,centers)z has a κ-INVARIANT resolution
    // (#1464). The data→center fill that pins L(κ) does NOT pin the center→center
    // penalty spectrum, so with the single shared L the penalty pseudo-determinant
    // logdet|S|₊ drifts freely with κ: as κ grows positive the geodesic kernel
    // collapses toward the constant, the center→center Gram eigenvalues bunch /
    // drop below the rank tolerance, logdet|S|₊ falls, and the REML Occam term
    // −½·logdet|S|₊ DECREASES — rewarding the +κ collapsed-kernel corner and
    // railing κ̂ to the +chart bound for any curved data (the headline #1464
    // sign-blindness: hyperbolic truth recovered as spherical, V_p(κ) monotone in
    // κ with no interior optimum). Building the penalty at L_S(κ) holds the
    // penalty eigenvalue SHAPE (hence logdet|S|₊ and its rank) κ-comparable, so
    // the Occam term stops rewarding the collapse and V_p regains an interior
    // minimum near the data-generating κ. At κ = 0, L_S = ℓ_ref = L, so the κ = 0
    // build is byte-identical.
    let (ell_eff_penalty, _, _) = constant_curvature_effective_length_jet(
        centers.view(),
        centers.view(),
        length_scale,
        spec.kappa,
    )?;
    let raw_penalty = constant_curvature_kernel_matrix(
        centers.view(),
        centers.view(),
        spec.kappa,
        ell_eff_penalty,
    )?;
    // Realized-design constraint transform: uniform coefficient sum-to-zero at
    // fit time; the frozen composed `z · z_parametric` at predict time (#532
    // pattern — see ConstantCurvatureIdentifiability).
    let z = match &spec.identifiability {
        ConstantCurvatureIdentifiability::FrozenTransform { transform } => {
            if transform.nrows() != centers.nrows() {
                crate::bail_dim_basis!(
                    "frozen constant-curvature identifiability transform mismatch: {} centers but transform has {} rows",
                    centers.nrows(),
                    transform.nrows()
                );
            }
            transform.clone()
        }
        ConstantCurvatureIdentifiability::CenterSumToZero => {
            let weights = Array1::<f64>::ones(centers.nrows());
            weighted_coefficient_sum_to_zero_transform(weights.view())?
        }
    };
    let gauge = gam_problem::Gauge::from_block_transforms(&[z.clone()]);
    let penalty = gauge.restrict_penalty(&raw_penalty);
    let raw_design = constant_curvature_kernel_matrix(data, centers.view(), spec.kappa, ell_eff)?;
    let design = gam_linalg::matrix::DesignMatrix::Dense(
        gam_linalg::matrix::DenseDesignMatrix::from(gauge.restrict_design(&raw_design)),
    );
    // Keep the RKHS penalty RAW (the symmetric kernel Gram zᵀKz) with
    // normalization_scale = 1, rather than Frobenius-normalizing it. The Gram's
    // eigenvalues ARE the physical RKHS roughness energies of each coefficient
    // direction: the smoothest functions (the low-degree / degree-1 signal) sit
    // in the genuinely tiny-eigenvalue directions, while wiggly functions sit in
    // the large ones — a spread of many orders of magnitude. Frobenius-
    // normalizing divides the whole operator by ‖·‖_F (dominated by the large
    // wiggly eigenvalues), which compresses that spread and inflates the
    // smallest eigenvalues relative to their natural scale. REML's scale-
    // sensitive λ heuristics then drive a single λ high enough to suppress the
    // wiggly directions and, because the smooth directions are no longer
    // proportionally tiny, over-shrink the recoverable low-degree signal
    // (planted degree-1 sphere harmonic recovered at only R²≈0.84). Keeping the
    // raw physical operator (scale = 1, matching the sphere-harmonic Laplace-
    // Beltrami penalty) lets REML act on true roughness, leaving the smooth
    // signal essentially unpenalized while still shrinking the wiggly tail —
    // raising recovery toward the unconstrained RKHS ceiling. The penalty stays
    // exactly proportional to zᵀKz, so the constrained-kernel-Gram contract is
    // unchanged.
    let penalty_sym = (&penalty + &penalty.t()) * 0.5;
    let mut candidates = vec![PenaltyCandidate {
        matrix: penalty_sym,
        nullspace_dim_hint: 0,
        source: PenaltySource::Primary,
        normalization_scale: 1.0,
        kronecker_factors: None,
        op: None,
    }];
    if spec.double_penalty {
        // #1531: identity ridge is CORRECT here, NOT the nullspace-shrinkage ridge
        // the sibling bases (sphere_basis / matern_kernel / duchon_thinplate) use.
        // The Marra & Wood double penalty shrinks the NULL SPACE of the primary
        // penalty so REML can drive an unsupported term to EDF→0. But the primary
        // here is the RKHS kernel Gram zᵀKz, which is strictly PD / full-rank on
        // distinct centers (see the `double_penalty: false` default note above): it
        // has NO null space. `build_nullspace_shrinkage_penalty(&primary)` returns
        // `Ok(None)` for a full-rank input, so matching the sibling pattern would
        // make an explicit `double_penalty = true` a silent no-op. The full identity
        // is the only second shrinkage coordinate that is actually selectable on a
        // null-space-free primary, so it is what an explicit double penalty must use.
        // The regression test `constant_curvature_gram_is_full_rank_so_identity_is_the_only_double_penalty`
        // locks the full-rank fact that justifies this; if a future basis change
        // gives the Gram a genuine null space, that test fails and this branch must
        // be revisited (switch to `build_nullspace_shrinkage_penalty`).
        let ridge = Array2::<f64>::eye(design.ncols());
        let (ridge_norm, c_ridge) = normalize_penalty(&ridge);
        candidates.push(PenaltyCandidate {
            matrix: ridge_norm,
            nullspace_dim_hint: 0,
            source: PenaltySource::DoublePenaltyNullspace,
            normalization_scale: c_ridge,
            kronecker_factors: None,
            op: None,
        });
    }
    let (penalties, nullspace_dims, penaltyinfo, null_eigenvectors, ops) =
        filter_active_penalty_candidates_with_ops(candidates)?;
    Ok(BasisBuildResult {
        design,
        penalties,
        nullspace_dims,
        penaltyinfo,
        metadata: BasisMetadata::ConstantCurvature {
            centers,
            kappa: spec.kappa,
            length_scale,
            constraint_transform: Some(z),
        },
        kronecker_factored: None,
        ops,
        null_eigenvectors,
        joint_null_rotation: None,
    })
}

/// Closed-form profiled Gaussian-REML negative-log-evidence of a dense design
/// `b` (n×p) against response `y`, with an UNPENALIZED intercept column appended
/// and the symmetric psd RKHS penalty `s` (p×p) profiled over a dense log-λ grid.
/// `min_λ D(λ)` with
///   `D(λ) = (n−Mp)·log(rss/(n−Mp)) + log|HᵀH| − log|λS|₊`,
/// `H = [1|b]ᵀ[1|b] + λ·diag(0,S)`, `Mp = 1 + nullity(S)` (the intercept is in the
/// null space). Self-contained — the same criterion shape the in-crate oracle
/// `profiled_gaussian_reml_deviance` certifies, with the production intercept the
/// full GAM always carries (so it matches what the fit path sees).
fn profiled_reml_with_intercept(b: &Array2<f64>, y: &Array1<f64>, s: &Array2<f64>) -> f64 {
    use gam_linalg::faer_ndarray::FaerEigh;
    let n = b.nrows();
    let p = b.ncols();
    // Augmented design [1 | b] and zero-padded penalty diag(0, S).
    let mut ba = Array2::<f64>::zeros((n, p + 1));
    for i in 0..n {
        ba[(i, 0)] = 1.0;
        for j in 0..p {
            ba[(i, j + 1)] = b[(i, j)];
        }
    }
    let mut sa = Array2::<f64>::zeros((p + 1, p + 1));
    for i in 0..p {
        for j in 0..p {
            sa[(i + 1, j + 1)] = s[(i, j)];
        }
    }
    let pa = p + 1;
    let btb = symmetrize(&ba.t().dot(&ba));
    let bty = ba.t().dot(y);
    let (s_evals, _) = FaerEigh::eigh(&symmetrize(&sa), faer::Side::Lower)
        .expect("κ-fair penalty eigendecomposition");
    let s_max = s_evals.iter().cloned().fold(0.0_f64, f64::max).max(1e-300);
    let s_tol = s_max * 1e-9;
    let r = s_evals.iter().filter(|&&e| e > s_tol).count();
    let m_p = pa - r;
    let dof = (n - m_p) as f64;
    let log_det_s_plus: f64 = s_evals
        .iter()
        .filter(|&&e| e > s_tol)
        .map(|&e| e.ln())
        .sum();
    let mut best = f64::INFINITY;
    for k in -24i32..=24 {
        let lam = (0.5 * f64::from(k)).exp();
        let h = symmetrize(&(&btb + &(sa.mapv(|v| v * lam))));
        let h_ridge = &h + &(Array2::<f64>::eye(pa) * (1e-10 * s_max.max(1.0)));
        let (hv, hq) = FaerEigh::eigh(&symmetrize(&h_ridge), faer::Side::Lower)
            .expect("κ-fair penalized-Hessian eigendecomposition");
        let qty = hq.t().dot(&bty);
        let mut beta = Array1::<f64>::zeros(pa);
        let mut log_det_h = 0.0_f64;
        for i in 0..pa {
            let ev = hv[i].max(1e-300);
            log_det_h += ev.ln();
            let coef = qty[i] / ev;
            for j in 0..pa {
                beta[j] += hq[(j, i)] * coef;
            }
        }
        let resid = y - &ba.dot(&beta);
        let rss = resid.dot(&resid).max(1e-300);
        let log_det_lam_s = (r as f64) * lam.ln() + log_det_s_plus;
        let dev = dof * (rss / dof).ln() + log_det_h - log_det_lam_s;
        if dev < best {
            best = dev;
        }
    }
    best
}

/// #1464: the **κ-fair** sign-resolving score for a constant-curvature smooth at
/// a fixed κ — the production datum the sign-basin scan minimizes to choose the
/// curvature SIGN basin.
///
/// THE DATA-FIT κ-FAIRNESS FIX. The L(κ)/L_S(κ) effective-length reparam already
/// holds the kernel FILL and the penalty Occam term κ-invariant (#944/#1464
/// penalty fix), but the realized profiled-REML DATA-FIT term is still sign-blind:
/// on a generic center-peaked radial signal the +κ chart's geodesic-distance
/// COMPRESSION concentrates the design's singular-value mass into the leading
/// (low-order radial) modes — a uniformly better interpolator of ANY radial peak,
/// regardless of the true curvature sign — so `V_p(κ)` decreases monotonically
/// toward the +chart bound for BOTH spherical and hyperbolic truth (hyperbolic
/// recovered as spherical, κ̂ railed to +0.5/max‖x‖²). Holding the EDF / hat-trace
/// or ‖X‖_F κ-invariant does NOT cure it: the advantage is the per-direction
/// REDISTRIBUTION of approximation power, not its total scale (verified — the EDF
/// is already κ-invariant to <1% under L(κ), yet RSS still falls toward +κ).
///
/// The cure makes the comparison apples-to-apples by SUBTRACTING the design's
/// GENERIC radial-peak-fitting power at this κ. We measure that generic power with
/// a bank of κ-INDEPENDENT reference signals `r_α(i) = exp(−α·‖x_i‖)` — radial in
/// the Euclidean chart coordinate, so carrying NO curvature-sign preference — and
/// score
///
/// ```text
///   V_fair(κ) = V_p(κ; y) − mean_α V_p(κ; r_α) .
/// ```
///
/// The generic +κ interpolation advantage cancels between the two terms (it lifts
/// `V_p(κ; y)` and `V_p(κ; r_α)` by the same amount), leaving only the GENUINE
/// curvature-shape alignment of the actual data `y` with the κ-geometry. The bank
/// (several α widths, averaged) removes the residual sensitivity of any single
/// reference width to the data realization, so `argmin_κ V_fair` lands on the
/// correct SIDE of 0 for both signs (spherical κ̂ > 0, hyperbolic κ̂ < 0) across
/// seeds. The reference correction enters ONLY the sign-basin SELECTION; the
/// realized fit and the magnitude/CI keep using the raw `V_p`, so the κ = 0 build
/// and the final coefficients are untouched.
///
/// Builds the design `X = K_κ(data, centers)·z` at the data→center effective
/// length `L(κ)` and the penalty `S = symm(zᵀK_κ(centers,centers)z)` at the
/// center→center effective length `L_S(κ)`, exactly as
/// [`build_constant_curvature_basis`] (raw RKHS Gram, scale = 1, intercept
/// appended unpenalized), so the criterion the scan minimizes is the production
/// design's own profiled REML.
/// Build the realized constant-curvature profile design `B = K_κ(data,
/// centers)·z` and penalty `S = symm(zᵀK_κ(centers,centers)z)` at the fixed κ in
/// `spec`, EXACTLY as [`build_constant_curvature_basis`] does (same centers, same
/// κ-invariant effective lengths `L(κ)`/`L_S(κ)`, same center-sum-to-zero `z`,
/// raw RKHS Gram penalty). Shared by the honest profiled-REML κ-profile score and
/// the κ-fair sign score so both probe the production design's own criterion.
fn constant_curvature_profile_design_penalty(
    data: ArrayView2<'_, f64>,
    spec: &ConstantCurvatureBasisSpec,
) -> Result<(Array2<f64>, Array2<f64>), BasisError> {
    if data.ncols() == 0 {
        crate::bail_invalid_basis!(
            "constant-curvature profile score needs at least one feature column"
        );
    }
    if !spec.kappa.is_finite() {
        crate::bail_invalid_basis!("constant-curvature profile score needs a finite kappa");
    }
    validate_chart_points(data, spec.kappa, "data")?;
    let centers = select_centers_by_strategy(data, &spec.center_strategy)?;
    if centers.nrows() < 2 {
        return Err(BasisError::InsufficientColumnsForConstraint {
            found: centers.nrows(),
        });
    }
    validate_chart_points(centers.view(), spec.kappa, "centers")?;
    let length_scale = realized_constant_curvature_length_scale(centers.view(), spec.length_scale)?;
    // Design effective length L(κ) (data→center fill) and penalty effective
    // length L_S(κ) (center→center fill) — identical to the value builder.
    let (ell_eff, _, _) =
        constant_curvature_effective_length_jet(data, centers.view(), length_scale, spec.kappa)?;
    let (ell_eff_penalty, _, _) = constant_curvature_effective_length_jet(
        centers.view(),
        centers.view(),
        length_scale,
        spec.kappa,
    )?;
    let weights = Array1::<f64>::ones(centers.nrows());
    let z = weighted_coefficient_sum_to_zero_transform(weights.view())?;
    let gauge = gam_problem::Gauge::from_block_transforms(&[z]);
    let raw_design = constant_curvature_kernel_matrix(data, centers.view(), spec.kappa, ell_eff)?;
    let b = gauge.restrict_design(&raw_design);
    let raw_penalty = constant_curvature_kernel_matrix(
        centers.view(),
        centers.view(),
        spec.kappa,
        ell_eff_penalty,
    )?;
    let s = symmetrize(&gauge.restrict_penalty(&raw_penalty));
    Ok((b, s))
}

/// #1464: the **honest** fixed-κ profiled-REML score `V_p(κ)` for a
/// constant-curvature smooth — the textbook Gaussian profiled-REML
/// negative-log-evidence of the realized design `B = K_κ(data,centers)·z` against
/// `y`, with the unpenalized intercept appended and the raw RKHS Gram penalty `S`
/// profiled over λ (`profiled_reml_with_intercept`). This is the criterion whose
/// argmin over the chart-bounded κ window IDENTIFIES the curvature, and the one
/// `curvature_inference_forspec` walks for the magnitude CI and the κ = 0 flatness
/// LR test.
///
/// Why this, not the production full-fit `reml_score`: the production REML's
/// λ-selection heavily SMOOTHS this RKHS kernel (deviance ≫ near-interpolation
/// RSS), and under heavy smoothing the +κ chart's geodesic-distance COMPRESSION
/// makes the collapsed kernel fit the over-smoothed target better for ANY data —
/// so the production `reml_score` is monotone toward the +chart bound regardless
/// of the true sign (the headline #1464 sign-blindness, and an over-smoothing of
/// the curvature criterion specifically). The honest profiled REML keeps the
/// curvature-shape signal in the data fit (the κ that matches the geodesic
/// geometry minimizes RSS), so its argmin lands on the correct sign, and because
/// it is a proper profiled-REML deviance the LR/CI thresholds stay χ²-calibrated.
/// On genuinely flat (constant-mean) data the criterion is ~flat in κ (the
/// intercept absorbs the mean at every κ), giving the flatness test correct size.
pub fn constant_curvature_honest_profiled_reml_score(
    data: ArrayView2<'_, f64>,
    y: ArrayView1<'_, f64>,
    spec: &ConstantCurvatureBasisSpec,
) -> Result<f64, BasisError> {
    if y.len() != data.nrows() {
        crate::bail_dim_basis!(
            "constant-curvature profiled-REML score: y has {} rows but data has {}",
            y.len(),
            data.nrows()
        );
    }
    let (b, s) = constant_curvature_profile_design_penalty(data, spec)?;
    let v = profiled_reml_with_intercept(&b, &y.to_owned(), &s);
    if !v.is_finite() {
        crate::bail_invalid_basis!(
            "constant-curvature honest profiled-REML score at κ={} is non-finite",
            spec.kappa
        );
    }
    Ok(v)
}

pub fn constant_curvature_kappa_fair_sign_score(
    data: ArrayView2<'_, f64>,
    y: ArrayView1<'_, f64>,
    spec: &ConstantCurvatureBasisSpec,
) -> Result<f64, BasisError> {
    if y.len() != data.nrows() {
        crate::bail_dim_basis!(
            "constant-curvature κ-fair score: y has {} rows but data has {}",
            y.len(),
            data.nrows()
        );
    }
    let (b, s) = constant_curvature_profile_design_penalty(data, spec)?;

    let v_y = profiled_reml_with_intercept(&b, &y.to_owned(), &s);

    // CURVATURE-NEUTRAL, ENERGY-MATCHED reference: a COARSE radial profile of the
    // data. The +κ chart compresses geodesic distances so the geodesic-
    // exponential kernel is a uniformly better interpolator of any radial signal
    // regardless of the true curvature sign; this generic interpolation advantage
    // lifts `V_p(κ)` monotonically toward +κ and must be cancelled so only the
    // genuine curvature-shape signal drives the sign. The reference that cancels
    // it is one carrying the same gross radial energy as the data but no fine
    // κ-geometry: `y_ref(i)` = mean of `y` over a SMALL number of Euclidean-radius
    // bins. The bin count is deliberately coarse: enough bins to track the data's
    // radial trend (so the +κ tilt cancels and a genuinely FLAT truth scores
    // ~symmetrically in κ — its response is already a function of `‖x‖` alone, so
    // `y_ref ≈ y` and the criterion refuses to prefer a sign), but few enough that
    // the profile CANNOT reproduce the data-generating `d_κ⋆` curvature shape — so
    // for a curved truth the residual `V_p(κ;y) − V_p(κ;y_ref)` still wells toward
    // the data-generating sign. A fine profile would absorb the curvature signal
    // (the radial truth is nearly a function of `‖x‖`); a fixed exp(−α‖x‖) bank
    // does not match the data's radial energy and leaves a strong residual −κ tilt.
    // The coarse matched profile shrinks that tilt to a small noise-overfit
    // residual (the geodesic kernel overfits noise slightly more in the hyperbolic
    // chart), so on a CURVED truth the genuine signal dominates and the argmin sign
    // is correct. A residual flat-data tilt remains, so this term alone does NOT
    // fully separate flat (κ ≈ 0) from hyperbolic (κ < 0); the caller adopts the
    // argmin only for the negative (hyperbolic) sign and leaves the spherical and
    // (residual-tilt) flat cases to the joint solver / κ ≈ 0 path.
    let radii: Array1<f64> = data.outer_iter().map(|row| row.dot(&row).sqrt()).collect();
    const N_RADIAL_BINS: usize = 10;
    let r_max = radii.iter().cloned().fold(0.0_f64, f64::max).max(1e-12);
    let bin_of = |r: f64| -> usize {
        (((r / r_max) * N_RADIAL_BINS as f64) as usize).min(N_RADIAL_BINS - 1)
    };
    let mut bin_sum = [0.0_f64; N_RADIAL_BINS];
    let mut bin_cnt = [0.0_f64; N_RADIAL_BINS];
    for (i, &r) in radii.iter().enumerate() {
        let b_idx = bin_of(r);
        bin_sum[b_idx] += y[i];
        bin_cnt[b_idx] += 1.0;
    }
    let bin_mean: Vec<f64> = bin_sum
        .iter()
        .zip(bin_cnt.iter())
        .map(|(&s, &c)| if c > 0.0 { s / c } else { 0.0 })
        .collect();
    let y_ref: Array1<f64> = radii.mapv(|r| bin_mean[bin_of(r)]);

    let v_ref = profiled_reml_with_intercept(&b, &y_ref, &s);

    let v_fair = v_y - v_ref;
    if !v_fair.is_finite() {
        crate::bail_invalid_basis!(
            "constant-curvature κ-fair score at κ={} is non-finite (V_y={v_y}, V_ref={v_ref})",
            spec.kappa
        );
    }
    Ok(v_fair)
}

/// Symmetrize `M` in place to `(M + Mᵀ)/2` (the realized penalty is built from
/// the symmetric kernel Gram; the κ-derivative blocks inherit the same exact
/// symmetrization the value path applies before normalization).
pub(crate) fn symmetrize(m: &Array2<f64>) -> Array2<f64> {
    gam_linalg::matrix::symmetrize(m)
}

/// Map a single primary-penalty κ-derivative onto the active penalty list by
/// source — the constant-curvature analogue of the Matérn double-penalty
/// derivative selector. The RKHS Gram is the only κ-moving penalty; the
/// double-penalty ridge `I` is κ-independent, so its derivative is exactly
/// zero. Any other source would mean the basis grew a penalty whose κ-movement
/// is unaccounted for, so we refuse loudly rather than silently drop a term.
pub(crate) fn active_constant_curvature_penalty_derivatives(
    penaltyinfo: &[PenaltyInfo],
    primary_derivative: &Array2<f64>,
) -> Result<Vec<Array2<f64>>, BasisError> {
    penaltyinfo
        .iter()
        .filter(|info| info.active)
        .map(|info| match &info.source {
            PenaltySource::Primary => Ok(primary_derivative.clone()),
            PenaltySource::DoublePenaltyNullspace => {
                Ok(Array2::<f64>::zeros(primary_derivative.raw_dim()))
            }
            other => Err(BasisError::InvalidInput(format!(
                "unexpected constant-curvature penalty source in κ-derivative path: {other:?}"
            ))),
        })
        .collect()
}

/// κ-derivative bundle for the constant-curvature smooth — the ψ-channel hook
/// that lets κ join the outer LAML/REML optimization as one signed,
/// design-moving coordinate (#944 stage 3 final wiring).
///
/// The outer optimizer's ψ-coordinate here is the **raw, signed curvature κ
/// itself** (NOT `log κ` as for the Matérn kernel scale): κ = 0 must be a
/// reachable interior point of the `S^d ← ℝ^d → H^d` family, which `log κ`
/// cannot represent. So this returns `∂·/∂κ` and `∂²·/∂κ²` directly, and the
/// outer assembly treats the coordinate as `ψ = κ` with `∂/∂ψ = ∂/∂κ`.
///
/// Every κ-fixed piece (centers, length scale ℓ, the center-space constraint
/// transform `z`) is held constant exactly as documented in the module
/// κ-contract, so the design moves with κ only through the geodesic-exponential
/// kernel and:
///
/// ```text
///   X = K(data, centers)·z          ⇒  ∂X/∂κ  = (∂K_dc/∂κ)·z,
///                                       ∂²X/∂κ² = (∂²K_dc/∂κ²)·z
///   S_raw = symm(zᵀ K(centers,centers) z)
///                                   ⇒  ∂S_raw/∂κ  = symm(zᵀ(∂K_cc/∂κ)z), etc.
/// ```
///
/// and the Frobenius penalty normalization is differentiated with the exact
/// quotient rules through the shared `normalize_penaltywith_psi_derivatives`
/// seam — identical to how the Matérn operator penalties propagate their
/// normalization. The double-penalty ridge `I` is κ-independent (zero
/// derivative).
///
/// Mirrors [`build_constant_curvature_basis`] so the realized design and
/// penalties whose κ-derivatives this returns are byte-for-byte the same
/// construction the value path produced (same centers, same ℓ, same `z`).
pub fn build_constant_curvature_basis_kappa_derivatives(
    data: ArrayView2<'_, f64>,
    spec: &ConstantCurvatureBasisSpec,
) -> Result<BasisPsiDerivativeBundle, BasisError> {
    if data.ncols() == 0 {
        crate::bail_invalid_basis!("constant-curvature smooth needs at least one feature column");
    }
    if !spec.kappa.is_finite() {
        crate::bail_invalid_basis!("constant-curvature smooth needs a finite kappa");
    }
    validate_chart_points(data, spec.kappa, "data")?;
    let centers = select_centers_by_strategy(data, &spec.center_strategy)?;
    if centers.nrows() < 2 {
        return Err(BasisError::InsufficientColumnsForConstraint {
            found: centers.nrows(),
        });
    }
    validate_chart_points(centers.view(), spec.kappa, "centers")?;
    let length_scale = realized_constant_curvature_length_scale(centers.view(), spec.length_scale)?;

    // κ-fixed constraint transform `z`, resolved exactly as the value builder.
    let z = match &spec.identifiability {
        ConstantCurvatureIdentifiability::FrozenTransform { transform } => {
            if transform.nrows() != centers.nrows() {
                crate::bail_dim_basis!(
                    "frozen constant-curvature identifiability transform mismatch: {} centers but transform has {} rows",
                    centers.nrows(),
                    transform.nrows()
                );
            }
            transform.clone()
        }
        ConstantCurvatureIdentifiability::CenterSumToZero => {
            let weights = Array1::<f64>::ones(centers.nrows());
            weighted_coefficient_sum_to_zero_transform(weights.view())?
        }
    };
    let gauge = gam_problem::Gauge::from_block_transforms(&[z.clone()]);

    // Effective-length κ-jet L(κ) = ℓ_ref·s(κ)/s₀ (the κ-invariant-resolution
    // fix). The kernel exponent is q = d/L with BOTH d and L moving in κ, so the
    // kernel κ-jets carry the full quotient chain rule — see
    // `constant_curvature_kernel_kappa_jets_scaled`.
    let l_jet =
        constant_curvature_effective_length_jet(data, centers.view(), length_scale, spec.kappa)?;

    // Design κ-jets: X = K(data, centers)·z, so the κ-derivatives are the
    // kernel κ-jets right-multiplied by the κ-fixed `z`.
    let (_k_dc, dk_dc, dkk_dc) =
        constant_curvature_kernel_kappa_jets_scaled(data, centers.view(), spec.kappa, l_jet)?;
    let design_first = gauge.restrict_design(&dk_dc);
    let design_second_diag = gauge.restrict_design(&dkk_dc);

    // Penalty κ-jets: S = symm(zᵀ K(centers,centers) z), kept RAW (no Frobenius
    // normalization) exactly as the value builder now does (scale = 1). The raw
    // symmetric penalty's κ-derivatives are therefore the symmetrized restricted
    // kernel κ-jets DIRECTLY — there is no normalization quotient rule to
    // propagate, which also removes the κ-dependent ‖S‖_F factor that the
    // normalized form had to differentiate.
    //
    // The penalty kernel is built at the CENTER→center effective-length jet
    // L_S(κ) (#1464), NOT the design's data→center L(κ), so the analytic κ-gradient
    // of logdet|S|₊ stays EXACT for the penalty-resolution-invariant value build
    // above. q_S = d/L_S with both d and L_S moving in κ, so the quotient chain
    // rule inside `constant_curvature_kernel_kappa_jets_scaled` carries the L_S jet.
    let l_jet_penalty = constant_curvature_effective_length_jet(
        centers.view(),
        centers.view(),
        length_scale,
        spec.kappa,
    )?;
    let (_k_cc, dk_cc, dkk_cc) = constant_curvature_kernel_kappa_jets_scaled(
        centers.view(),
        centers.view(),
        spec.kappa,
        l_jet_penalty,
    )?;
    let s_first = symmetrize(&gauge.restrict_penalty(&dk_cc));
    let s_second = symmetrize(&gauge.restrict_penalty(&dkk_cc));

    // Align the single primary-penalty derivative with the realized active
    // penalty list (primary always; ridge only when double_penalty, and
    // κ-independent). Rebuild the realized basis once to read `penaltyinfo`.
    let base = build_constant_curvature_basis(data, spec)?;
    let penalties_derivative =
        active_constant_curvature_penalty_derivatives(&base.penaltyinfo, &s_first)?;
    let penaltiessecond_derivative =
        active_constant_curvature_penalty_derivatives(&base.penaltyinfo, &s_second)?;

    Ok(BasisPsiDerivativeBundle {
        first: BasisPsiDerivativeResult {
            design_derivative: design_first,
            penalties_derivative,
            implicit_operator: None,
        },
        second: BasisPsiSecondDerivativeResult {
            designsecond_derivative: design_second_diag,
            penaltiessecond_derivative,
            implicit_operator: None,
        },
        implicit_operator: None,
    })
}

#[cfg(test)]
mod tests {
    use super::*;
    use gam_linalg::faer_ndarray::FaerEigh;

    // Diagnostic (#1059 follow-up): show that a κ-FROZEN chart-scale length
    // makes the geodesic-exponential kernel COLLAPSE toward the constant
    // function as κ grows positive (sphere distances compress), which is the
    // degenerate optimum the REML criterion rails to. For a fixed center set we
    // print, per κ, the median geodesic distance and the kernel "spread"
    // 1 − mean(offdiag K). A collapsing kernel ⇒ spread → 0 as κ ↑.
    #[test]
    pub(crate) fn kernel_spread_collapses_with_kappa_at_frozen_length_scale() {
        // 8 centers in a disk of radius 0.45 (inside every κ∈[-2,2] chart).
        let centers = ndarray::array![
            [0.10, 0.05],
            [-0.20, 0.15],
            [0.30, -0.10],
            [-0.05, -0.25],
            [0.22, 0.20],
            [-0.30, -0.05],
            [0.05, 0.30],
            [-0.15, 0.10],
        ];
        // Frozen ℓ: the κ=0 chart-scale auto rule (median 2‖Δ‖).
        let ell_frozen = realized_constant_curvature_length_scale(centers.view(), 0.0).unwrap();

        let spread = |kappa: f64, ell: f64| -> f64 {
            let k = constant_curvature_kernel_matrix(centers.view(), centers.view(), kappa, ell)
                .unwrap();
            let m = k.nrows();
            let mut s = 0.0;
            let mut cnt = 0.0;
            for i in 0..m {
                for j in 0..m {
                    if i != j {
                        s += k[(i, j)];
                        cnt += 1.0;
                    }
                }
            }
            1.0 - s / cnt
        };

        let s_neg = spread(-2.0, ell_frozen);
        let s_zero = spread(0.0, ell_frozen);
        let s_pos = spread(2.0, ell_frozen);
        eprintln!(
            "[κ-collapse] frozen ℓ={ell_frozen:.4}: spread κ=-2 {s_neg:.4} | κ=0 {s_zero:.4} | κ=+2 {s_pos:.4}"
        );

        // The degenerate signature: positive κ collapses the kernel toward the
        // constant (spread shrinks), so the criterion can buy cheap EDF by
        // pushing κ up — this is the unidentifiability we are fixing.
        assert!(
            s_pos < s_zero && s_zero < s_neg,
            "expected kernel spread to shrink with κ at frozen ℓ: κ=-2 {s_neg} κ=0 {s_zero} κ=+2 {s_pos}"
        );

        // Decompose the κ-monotone REML Occam term. The realized penalty is the
        // Frobenius-normalized centered Gram S~ = S_raw/‖S_raw‖_F with
        // S_raw = symm(zᵀ K z); the REML evidence carries +½ log|S~|_+ over its
        // range. Print log det₊(S~) per κ to see whether the penalty-normalization
        // Occam term (not just the modest kernel-spread shift) is what rails κ.
        let weights = Array1::<f64>::ones(centers.nrows());
        let z = weighted_coefficient_sum_to_zero_transform(weights.view()).unwrap();
        let logdet_norm_penalty = |kappa: f64, ell: f64| -> f64 {
            let k = constant_curvature_kernel_matrix(centers.view(), centers.view(), kappa, ell)
                .unwrap();
            let s_raw = symmetrize(&z.t().dot(&k).dot(&z));
            let (s_norm, _c) = normalize_penalty(&s_raw);
            let sym = symmetrize(&s_norm);
            let (evals, _v) = FaerEigh::eigh(&sym, faer::Side::Lower).unwrap();
            let max = evals.iter().cloned().fold(0.0_f64, f64::max);
            let tol = max * 1e-9;
            evals
                .iter()
                .filter(|&&e| e > tol)
                .map(|&e| e.ln())
                .sum::<f64>()
        };
        let l_neg = logdet_norm_penalty(-2.0, ell_frozen);
        let l_zero = logdet_norm_penalty(0.0, ell_frozen);
        let l_pos = logdet_norm_penalty(2.0, ell_frozen);
        eprintln!(
            "[κ-collapse] log|S~|_+ (frozen ℓ): κ=-2 {l_neg:.4} | κ=0 {l_zero:.4} | κ=+2 {l_pos:.4}"
        );

        // GEODESIC-SCALED ℓ removes the κ-dependence of the kernel resolution:
        // set ℓ(κ) = median geodesic distance d_κ among centers. Then the spread
        // should be ~κ-invariant. Print the geodesic-ℓ spread per κ.
        let geo_median_ell = |kappa: f64| -> f64 {
            let m = centers.nrows();
            let manifold = ConstantCurvature::new(centers.ncols(), kappa);
            let mut dists = Vec::with_capacity(m * (m - 1) / 2);
            for i in 0..m {
                for j in (i + 1)..m {
                    dists.push(manifold.distance(centers.row(i), centers.row(j)).unwrap());
                }
            }
            dists.sort_by(|a, b| a.partial_cmp(b).unwrap());
            dists[dists.len() / 2]
        };
        let gs_neg = spread(-2.0, geo_median_ell(-2.0));
        let gs_zero = spread(0.0, geo_median_ell(0.0));
        let gs_pos = spread(2.0, geo_median_ell(2.0));
        let gl_neg = logdet_norm_penalty(-2.0, geo_median_ell(-2.0));
        let gl_zero = logdet_norm_penalty(0.0, geo_median_ell(0.0));
        let gl_pos = logdet_norm_penalty(2.0, geo_median_ell(2.0));
        eprintln!(
            "[κ-collapse] geodesic ℓ: spread κ=-2 {gs_neg:.4} | κ=0 {gs_zero:.4} | κ=+2 {gs_pos:.4}"
        );
        eprintln!(
            "[κ-collapse] geodesic ℓ: log|S~|_+ κ=-2 {gl_neg:.4} | κ=0 {gl_zero:.4} | κ=+2 {gl_pos:.4}"
        );

        // CANDIDATE FIX: freeze the Frobenius normalization constant at κ=0 so
        // the REML Occam term log|S_λ|_+ carries only the GENUINE roughness
        // spectrum log|S_raw(κ)|_+ (minus a κ-independent constant), not the
        // spurious −r·log‖S_raw(κ)‖_F leak. Compare:
        //   (a) log|S_raw(κ)|_+        (un-normalized, true roughness Occam term)
        //   (b) log|S_raw(κ)/c₀|_+     (frozen-c₀ normalization at κ=0)
        // Both should be κ-IDENTIFYING (a real interior optimum), not monotone.
        let logdet_raw = |kappa: f64, ell: f64, c0: f64| -> f64 {
            let k = constant_curvature_kernel_matrix(centers.view(), centers.view(), kappa, ell)
                .unwrap();
            let s_raw = symmetrize(&z.t().dot(&k).dot(&z));
            let scaled = s_raw.mapv(|v| v / c0);
            let (evals, _v) = FaerEigh::eigh(&scaled, faer::Side::Lower).unwrap();
            let max = evals.iter().cloned().fold(0.0_f64, f64::max);
            let tol = max * 1e-9;
            evals
                .iter()
                .filter(|&&e| e > tol)
                .map(|&e| e.ln())
                .sum::<f64>()
        };
        // c₀ = ‖S_raw(κ=0)‖_F at frozen ℓ.
        let k0 = constant_curvature_kernel_matrix(centers.view(), centers.view(), 0.0, ell_frozen)
            .unwrap();
        let s_raw0 = symmetrize(&z.t().dot(&k0).dot(&z));
        let c0 = s_raw0.iter().map(|v| v * v).sum::<f64>().sqrt();
        let r_neg = logdet_raw(-2.0, ell_frozen, c0);
        let r_zero = logdet_raw(0.0, ell_frozen, c0);
        let r_pos = logdet_raw(2.0, ell_frozen, c0);
        eprintln!(
            "[κ-collapse] frozen-c₀ log|S_raw/c₀|_+ (frozen ℓ): κ=-2 {r_neg:.4} | κ=0 {r_zero:.4} | κ=+2 {r_pos:.4}"
        );
        // Finer grid to see the shape of the un-normalized roughness Occam term.
        eprint!("[κ-collapse] frozen-c₀ grid:");
        for kk in [-2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0] {
            eprint!(" κ={kk}:{:.4}", logdet_raw(kk, ell_frozen, c0));
        }
        eprintln!();
    }

    // ===================================================================
    //  WITNESS ORACLE — κ-identification theorem (#944 / #1059)
    // ===================================================================
    //
    //  THEORY (derived by hand, this session).
    //
    //  The constant-curvature smooth realizes a Gaussian penalized fit whose
    //  ONLY κ-moving pieces are (i) the design X(κ) = K_κ(data, centers)·z and
    //  (ii) the RKHS penalty S_raw(κ) = zᵀ K_κ(centers,centers) z, both built
    //  from the geodesic-exponential kernel exp(−d_κ/L(κ)). REML profiles the
    //  smoothing parameter λ out, giving a 1-D profiled criterion V_p(κ).
    //
    //  Claim 1 (FROBENIUS GAUGE — confound #2 is NOT real). The live penalty is
    //  the Frobenius-normalized S~ = S_raw/c, c = ‖S_raw‖_F, entering REML as
    //  λ·S~ = (λ/c)·S_raw. Reparametrize μ = λ/c. The whole REML objective —
    //  data fit, log|XᵀX + λS~| and the pseudo-logdet +r·log λ + log|S~|_+ —
    //  depends on (λ, c) only through μ, because
    //      log|λS~|_+ = r·log λ + log|S_raw|_+ − r·log c
    //                 = r·log μ + log|S_raw|_+,
    //  and the fit/curvature terms see only μ·S_raw. Hence the diagnostic
    //  `log|S~|_+`-per-κ "Occam leak" −r·log‖S_raw(κ)‖_F is a PURE GAUGE that the
    //  profiled-λ criterion cancels exactly. The κ-railing is therefore NOT a
    //  penalty-normalization artifact; chasing it in `normalize_penalty` is a
    //  dead end. Encoded below: V_p(κ) is invariant under S_raw → α·S_raw.
    //
    //  Claim 2 (IDENTIFICATION — the L(κ) fix is the cure). With a κ-FROZEN
    //  length ℓ the kernel RESOLUTION drifts with κ (positive κ compresses
    //  geodesic distances → narrower bumps → inflated effective DOF), so REML
    //  buys deviance by railing κ to the +chart bound for EVERY truth — V_p is
    //  monotone, κ unidentified. Tying the length to the DATA→center geodesic
    //  scale, L(κ) = ℓ_ref·s_dc(κ)/s₀_dc, holds the typical design entry
    //  d_κ(data,c)/L(κ) κ-invariant in MEAN, so only the distance-matrix SHAPE
    //  (the genuine curvature signal: how data→center distances DISPERSE
    //  relative to their mean as the geometry bends) moves V_p. Then V_p has an
    //  interior minimum whose sign matches sign(κ⋆). Encoded below: argmin of
    //  the profiled REML over a κ-grid lands on the correct SIDE of 0 for both a
    //  hyperbolic (κ⋆<0) and a spherical (κ⋆>0) truth — and FAILS (rails to the
    //  +bound) if the length is frozen instead of L(κ)-scaled.
    //
    //  Profiled Gaussian REML used by the oracle (closed form, ridge-stabilized
    //  generalized eigenbasis): for response y (n), design B = X·(whitened),
    //  penalty S (psd), REML deviance at smoothing λ is
    //     D(λ) = (n−Mp)·log(rss/(n−Mp)) + log|BᵀB+λS| − log|λS|_+ ,
    //  rss = ‖y − B β̂_λ‖², β̂_λ = (BᵀB+λS)⁻¹Bᵀy, Mp = nullity(S). We minimize
    //  D over a dense log-λ grid (the inner profile) and over κ (the outer).

    /// Closed-form profiled Gaussian-REML deviance min over a log-λ grid for a
    /// dense design `b` (n×p) and symmetric psd penalty `s` (p×p). Returns
    /// `min_λ D(λ)`. Self-contained so the oracle does not depend on the outer
    /// solver wiring — it tests the CRITERION SHAPE the wiring profiles.
    pub(crate) fn profiled_gaussian_reml_deviance(
        b: &Array2<f64>,
        y: &Array1<f64>,
        s: &Array2<f64>,
    ) -> f64 {
        let n = b.nrows();
        let p = b.ncols();
        let btb = symmetrize(&b.t().dot(b));
        let bty = b.t().dot(y);
        // Penalty range/null split via eigendecomposition.
        let (s_evals, _sv) = FaerEigh::eigh(&symmetrize(s), faer::Side::Lower).unwrap();
        let s_max = s_evals.iter().cloned().fold(0.0_f64, f64::max).max(1e-300);
        let s_tol = s_max * 1e-9;
        let r = s_evals.iter().filter(|&&e| e > s_tol).count(); // rank
        let m_p = p - r; // nullity
        let dof = (n - m_p) as f64;
        let mut best = f64::INFINITY;
        // log-λ grid spanning the regimes that matter for the profile.
        for k in -24i32..=24 {
            let lam = (0.5 * f64::from(k)).exp();
            let h = &btb + &(s.mapv(|v| v * lam));
            let h = symmetrize(&h);
            // β̂ = H⁻¹ Bᵀy via eigensolve (H spd: BᵀB psd + λS psd, +tiny ridge).
            let h_ridge = &h + &(Array2::<f64>::eye(p) * (1e-10 * s_max.max(1.0)));
            let (hv, hq) = FaerEigh::eigh(&symmetrize(&h_ridge), faer::Side::Lower).unwrap();
            let qty = hq.t().dot(&bty);
            let mut beta = Array1::<f64>::zeros(p);
            let mut log_det_h = 0.0_f64;
            for i in 0..p {
                let ev = hv[i].max(1e-300);
                log_det_h += ev.ln();
                let coef = qty[i] / ev;
                for j in 0..p {
                    beta[j] += hq[(j, i)] * coef;
                }
            }
            let resid = y - &b.dot(&beta);
            let rss = resid.dot(&resid).max(1e-300);
            // log|λS|_+ = r·log λ + log|S|_+ (sum of positive S eigenvalues).
            let log_det_s_plus: f64 = s_evals
                .iter()
                .filter(|&&e| e > s_tol)
                .map(|&e| e.ln())
                .sum();
            let log_det_lam_s = (r as f64) * lam.ln() + log_det_s_plus;
            let dev = dof * (rss / dof).ln() + log_det_h - log_det_lam_s;
            if dev < best {
                best = dev;
            }
        }
        best
    }

    /// Build the κ-scaled (`L(κ)`) constant-curvature design B = K_κ(data,c)·z
    /// and penalty S~ = (zᵀK_κ(c,c)z)/‖·‖_F for a fixed center set, mirroring the
    /// live `build_constant_curvature_basis` math.
    pub(crate) fn oracle_design_and_penalty(
        data: ArrayView2<'_, f64>,
        centers: ArrayView2<'_, f64>,
        ell_ref: f64,
        kappa: f64,
        frozen_length: bool,
    ) -> (Array2<f64>, Array2<f64>) {
        let weights = Array1::<f64>::ones(centers.nrows());
        let z = weighted_coefficient_sum_to_zero_transform(weights.view()).unwrap();
        let ell = if frozen_length {
            ell_ref
        } else {
            constant_curvature_effective_length_jet(data, centers, ell_ref, kappa)
                .unwrap()
                .0
        };
        let k_dc = constant_curvature_kernel_matrix(data, centers, kappa, ell).unwrap();
        let b = k_dc.dot(&z);
        let k_cc = constant_curvature_kernel_matrix(centers, centers, kappa, ell).unwrap();
        let s_raw = symmetrize(&z.t().dot(&k_cc).dot(&z));
        let (s_norm, _c) = normalize_penalty(&s_raw);
        (b, symmetrize(&s_norm))
    }

    /// Claim 1: the profiled REML criterion is INVARIANT under S → α·S (the
    /// Frobenius normalization constant is pure gauge, absorbed by λ). This
    /// proves the `log|S~|_+` "Occam leak" the diagnostic prints is NOT a real
    /// κ-confound — so the κ fix correctly lives in the LENGTH, not the penalty
    /// normalization.
    #[test]
    pub(crate) fn profiled_reml_is_invariant_to_penalty_frobenius_scale() {
        let (data, centers) = oracle_disk_design_centers();
        let ell_ref = realized_constant_curvature_length_scale(centers.view(), 0.0).unwrap();
        // A reproducible response with curvature-shaped signal at κ = −1.
        let y = oracle_response(data.view(), centers.view(), ell_ref, -1.0, 7);
        for &kappa in &[-1.5_f64, -0.5, 0.0, 0.8, 1.5] {
            let (b, s) =
                oracle_design_and_penalty(data.view(), centers.view(), ell_ref, kappa, false);
            let v0 = profiled_gaussian_reml_deviance(&b, &y, &s);
            for &alpha in &[1e-3_f64, 37.0, 1e4] {
                let s_scaled = s.mapv(|v| v * alpha);
                let va = profiled_gaussian_reml_deviance(&b, &y, &s_scaled);
                assert!(
                    (v0 - va).abs() <= 1e-7 * (1.0 + v0.abs()),
                    "profiled REML must be invariant to penalty scale α={alpha} at κ={kappa}: \
                     V(S)={v0} vs V(αS)={va} — the Frobenius normalization is NOT gauge, \
                     so confound #2 (−r·log‖S_raw‖_F) WOULD be real"
                );
            }
        }
    }

    /// Claim 2: with the L(κ) data→center effective length, the profiled REML
    /// criterion identifies the SIGN of the true curvature — argmin lands on the
    /// correct side of 0 for BOTH a hyperbolic (κ⋆<0) and a spherical (κ⋆>0)
    /// truth. The same grid with a κ-FROZEN length rails to the +bound for both
    /// (the #944/#1059 unidentifiability), which the oracle also asserts so the
    /// witness FAILS on the pre-fix code path.
    #[test]
    pub(crate) fn profiled_reml_identifies_curvature_sign_with_effective_length() {
        let (data, centers) = oracle_disk_design_centers();
        let ell_ref = realized_constant_curvature_length_scale(centers.view(), 0.0).unwrap();
        let grid: Vec<f64> = (-30..=30).map(|i| f64::from(i) * 0.1).collect();

        let argmin_sign = |kappa_true: f64, frozen: bool| -> (f64, f64) {
            let y = oracle_response(data.view(), centers.view(), ell_ref, kappa_true, 11);
            let mut best_k = f64::NAN;
            let mut best_v = f64::INFINITY;
            for &kappa in &grid {
                let (b, s) =
                    oracle_design_and_penalty(data.view(), centers.view(), ell_ref, kappa, frozen);
                let v = profiled_gaussian_reml_deviance(&b, &y, &s);
                if v < best_v {
                    best_v = v;
                    best_k = kappa;
                }
            }
            (best_k, best_v)
        };

        // --- Hyperbolic truth κ⋆ = −2: L(κ) criterion must pick κ̂ < 0. ---
        let (k_hyp, _) = argmin_sign(-2.0, false);
        eprintln!("[κ-ident] L(κ): hyperbolic truth κ⋆=−2  → κ̂={k_hyp:.2}");
        assert!(
            k_hyp < 0.0,
            "L(κ) profiled REML must identify NEGATIVE curvature for hyperbolic truth; got κ̂={k_hyp}"
        );

        // --- Spherical truth κ⋆ = +2: L(κ) criterion must pick κ̂ > 0. ---
        let (k_sph, _) = argmin_sign(2.0, false);
        eprintln!("[κ-ident] L(κ): spherical truth κ⋆=+2  → κ̂={k_sph:.2}");
        assert!(
            k_sph > 0.0,
            "L(κ) profiled REML must identify POSITIVE curvature for spherical truth; got κ̂={k_sph}"
        );

        // --- WITNESS that the FROZEN length is broken: it rails the hyperbolic
        // truth to the +bound (κ̂ at the top of the grid), i.e. wrong sign. This
        // line FAILS on the pre-L(κ) code, documenting the bug the fix cures. ---
        let (k_frozen_hyp, _) = argmin_sign(-2.0, true);
        eprintln!("[κ-ident] frozen ℓ: hyperbolic truth κ⋆=−2 → κ̂={k_frozen_hyp:.2} (railing bug)");
        assert!(
            k_frozen_hyp > grid[grid.len() - 2],
            "frozen-ℓ criterion is expected to RAIL hyperbolic truth to the +bound (the bug \
             L(κ) fixes); if it no longer rails, the frozen-vs-scaled contrast is stale: κ̂={k_frozen_hyp}"
        );
    }

    /// The fill-invariant effective-length κ-jet `(L, L′, L″)` must be EXACT:
    /// `L` solves the fill target `g(L,κ)=fill⋆` (verify the fill is held
    /// κ-invariant), and `L′`, `L″` match central finite differences of the
    /// implicit solution `L(κ)` itself (re-solving the Newton root at κ±h). This
    /// is the gate the ψ-channel outer gradient depends on — `L′`,`L″` feed the
    /// kernel quotient jets in `constant_curvature_kernel_kappa_jets_scaled`.
    #[test]
    pub(crate) fn effective_length_jet_matches_fd_of_implicit_solution() {
        let (data, centers) = oracle_disk_design_centers();
        let ell_ref = realized_constant_curvature_length_scale(centers.view(), 0.0).unwrap();
        // Reference fill at κ = 0 (the target L(κ) is pinned to).
        let fill_star = data_center_reference_fill(data.view(), centers.view(), ell_ref).unwrap();
        // Solve-only helper: the converged Newton root L(κ) for FD of the jet.
        let solve_l = |kappa: f64| -> f64 {
            constant_curvature_effective_length_jet(data.view(), centers.view(), ell_ref, kappa)
                .unwrap()
                .0
        };
        let h = 1e-5_f64;
        for &kappa in &[-1.5_f64, -0.5, -1e-7, 0.0, 1e-7, 0.8, 1.7] {
            let (l, l1, l2) = constant_curvature_effective_length_jet(
                data.view(),
                centers.view(),
                ell_ref,
                kappa,
            )
            .unwrap();
            // L solves the fill target: g(L, κ) = fill⋆.
            let (g, ..) = data_center_fill_partials(data.view(), centers.view(), kappa, l).unwrap();
            assert!(
                (g - fill_star).abs() <= 1e-10 * (1.0 + fill_star.abs()),
                "κ={kappa}: fill not held invariant: g(L,κ)={g} vs fill⋆={fill_star}"
            );
            // κ = 0 ⇒ L = ℓ_ref exactly (the reference point).
            if kappa == 0.0 {
                assert!(
                    (l - ell_ref).abs() <= 1e-10 * ell_ref,
                    "L(0) must equal ℓ_ref; got {l} vs {ell_ref}"
                );
            }
            // L′, L″ vs central FD of the re-solved implicit root.
            let lp = solve_l(kappa + h);
            let lm = solve_l(kappa - h);
            let fd1 = (lp - lm) / (2.0 * h);
            let fd2 = (lp - 2.0 * l + lm) / (h * h);
            assert!(
                (l1 - fd1).abs() <= 1e-5 * (1.0 + fd1.abs()),
                "κ={kappa}: L′ analytic {l1} vs FD {fd1}"
            );
            assert!(
                (l2 - fd2).abs() <= 1e-3 * (1.0 + fd2.abs()),
                "κ={kappa}: L″ analytic {l2} vs FD {fd2}"
            );
        }
    }

    /// 8 data rows + 8 centers inside a disk of radius < 0.5 (valid in every
    /// κ ∈ [−3, 3] chart). Data ≠ centers so the data→center scale is nontrivial.
    pub(crate) fn oracle_disk_design_centers() -> (Array2<f64>, Array2<f64>) {
        let centers = ndarray::array![
            [0.10, 0.05],
            [-0.20, 0.15],
            [0.30, -0.10],
            [-0.05, -0.25],
            [0.22, 0.20],
            [-0.30, -0.05],
            [0.05, 0.30],
            [-0.15, 0.10],
        ];
        // Deterministic pseudo-random data on a slightly wider disk.
        let mut state = 0x2545_f491_4f6c_dd1d_u64;
        let mut next = || {
            state ^= state << 13;
            state ^= state >> 7;
            state ^= state << 17;
            // map to (−0.42, 0.42)
            ((state >> 11) as f64 / (1u64 << 53) as f64 - 0.5) * 0.84
        };
        let n = 60usize;
        let mut data = Array2::<f64>::zeros((n, 2));
        for i in 0..n {
            data[(i, 0)] = next();
            data[(i, 1)] = next();
        }
        (data, centers)
    }

    /// A curvature-shaped Gaussian response: y = B(κ⋆)·β + ε with β a fixed
    /// pseudo-random vector and ε small, so the SIGNAL geometry is κ⋆.
    pub(crate) fn oracle_response(
        data: ArrayView2<'_, f64>,
        centers: ArrayView2<'_, f64>,
        ell_ref: f64,
        kappa_true: f64,
        seed: u64,
    ) -> Array1<f64> {
        let (b, _s) = oracle_design_and_penalty(data, centers, ell_ref, kappa_true, false);
        let p = b.ncols();
        let mut state = 0x9e37_79b9_7f4a_7c15_u64 ^ seed.wrapping_mul(0x1000_0000_1b3);
        let mut next = || {
            state ^= state << 13;
            state ^= state >> 7;
            state ^= state << 17;
            (state >> 11) as f64 / (1u64 << 53) as f64 - 0.5
        };
        let beta: Array1<f64> = (0..p).map(|_| next() * 2.0).collect();
        let mut y = b.dot(&beta);
        for v in y.iter_mut() {
            *v += next() * 0.05;
        }
        y
    }

    /// #1531 regression: the constant-curvature RKHS primary penalty (the
    /// gauge-restricted kernel Gram `zᵀKz`) is strictly PD / full-rank, so it has
    /// NO null space. This is the fact that makes the `double_penalty` identity
    /// ridge at the top of `build_constant_curvature_basis` correct rather than a
    /// "ridge in the wrong chart": the sibling-basis nullspace-shrinkage path
    /// (`build_nullspace_shrinkage_penalty`) returns `None` on a full-rank primary,
    /// which would turn an explicit `double_penalty = true` into a silent no-op.
    /// If a future basis change gives the primary a genuine null space, this test
    /// fails and the identity-vs-nullspace decision at line ~724 must be revisited.
    #[test]
    fn constant_curvature_gram_is_full_rank_so_identity_is_the_only_double_penalty() {
        // Centers inside every κ chart, several curvatures spanning sign.
        let centers = ndarray::array![
            [0.10, 0.05],
            [-0.20, 0.15],
            [0.30, -0.10],
            [-0.05, -0.25],
            [0.22, 0.20],
            [-0.30, -0.05],
            [0.05, 0.30],
            [-0.15, 0.10],
        ];
        let weights = Array1::<f64>::ones(centers.nrows());
        let z = weighted_coefficient_sum_to_zero_transform(weights.view()).unwrap();
        // Frozen auto length scale (the κ=0 chart-scale rule; 0.0 ⇒ auto), reused
        // across κ so the full-rank check is on the same resolution the basis uses.
        let ell = realized_constant_curvature_length_scale(centers.view(), 0.0).unwrap();

        for &kappa in &[-2.0_f64, -0.5, 0.0, 0.5, 2.0] {
            let k = constant_curvature_kernel_matrix(centers.view(), centers.view(), kappa, ell)
                .unwrap();
            // Primary penalty exactly as the basis builder forms it: symmetrized
            // gauge-restricted kernel Gram.
            let raw = symmetrize(&z.t().dot(&k).dot(&z));

            // (a) The primary is full-rank PD: smallest eigenvalue is strictly
            // positive (well above the spectral tolerance), so there is no null
            // space for a Marra-Wood ridge to shrink.
            let (evals, _v) = FaerEigh::eigh(&raw, faer::Side::Lower).unwrap();
            let max = evals.iter().cloned().fold(0.0_f64, f64::max);
            let min = evals.iter().cloned().fold(f64::INFINITY, f64::min);
            assert!(
                max > 0.0 && min > max * 1e-9,
                "constant-curvature Gram must be full-rank PD at κ={kappa}: \
                 min eig {min:e}, max eig {max:e}"
            );

            // (b) Consequently the sibling nullspace-shrinkage builder yields
            // nothing: matching that pattern would make `double_penalty` a no-op,
            // confirming the identity ridge is the only selectable double penalty.
            let null_shrink =
                crate::basis::bspline_build::build_nullspace_shrinkage_penalty(&raw).unwrap();
            assert!(
                null_shrink.is_none(),
                "build_nullspace_shrinkage_penalty must return None on the full-rank \
                 constant-curvature primary at κ={kappa} (else the double penalty would be \
                 a silent no-op and identity would be wrong)"
            );
        }
    }
}