Skip to main content

gam_terms/basis/
duchon_kernel_math.rs

1use super::*;
2
3pub fn build_duchon_collocation_operator_matrices(
4    centers: ArrayView2<'_, f64>,
5    collocationweights: Option<ArrayView1<'_, f64>>,
6    length_scale: Option<f64>,
7    power: f64,
8    nullspace_order: DuchonNullspaceOrder,
9    aniso_log_scales: Option<&[f64]>,
10    identifiability_transform: Option<ArrayView2<'_, f64>>,
11    max_operator_derivative_order: usize,
12) -> Result<CollocationOperatorMatrices, BasisError> {
13    let mut workspace = BasisWorkspace::default();
14    build_duchon_collocation_operator_matriceswithworkspace(
15        centers,
16        centers,
17        collocationweights,
18        length_scale,
19        power,
20        nullspace_order,
21        aniso_log_scales,
22        identifiability_transform,
23        max_operator_derivative_order,
24        &mut workspace,
25    )
26}
27
28pub fn build_duchon_operator_penalty_matrices(
29    centers: ArrayView2<'_, f64>,
30    collocationweights: Option<ArrayView1<'_, f64>>,
31    length_scale: Option<f64>,
32    power: f64,
33    nullspace_order: DuchonNullspaceOrder,
34    aniso_log_scales: Option<&[f64]>,
35    identifiability_transform: Option<ArrayView2<'_, f64>>,
36) -> Result<DuchonOperatorPenaltyMatrices, BasisError> {
37    let ops = build_duchon_collocation_operator_matrices(
38        centers,
39        collocationweights,
40        length_scale,
41        power,
42        nullspace_order,
43        aniso_log_scales,
44        identifiability_transform,
45        2,
46    )?;
47    let (mass, _) = normalize_penalty(&symmetrize(&fast_ata(&ops.d0)));
48    let (tension, _) = normalize_penalty(&symmetrize(&fast_ata(&ops.d1)));
49    let (stiffness, _) = normalize_penalty(&symmetrize(&fast_ata(&ops.d2)));
50    Ok(DuchonOperatorPenaltyMatrices {
51        mass,
52        tension,
53        stiffness,
54    })
55}
56
57pub fn build_thin_plate_penalty_matrix(
58    centers: ArrayView2<'_, f64>,
59    length_scale: f64,
60) -> Result<ThinPlatePenaltyMatrix, BasisError> {
61    let mut workspace = BasisWorkspace::default();
62    let kernel_transform = thin_plate_kernel_constraint_nullspace(centers, &mut workspace.cache)?;
63    let (penalty, _) =
64        build_thin_plate_penalty_matrices(centers, length_scale, &kernel_transform, false)?;
65    let (penalty, _) = normalize_penalty(&penalty);
66    Ok(ThinPlatePenaltyMatrix { penalty })
67}
68
69pub fn build_duchon_collocation_operator_matriceswithworkspace(
70    centers: ArrayView2<'_, f64>,
71    collocation_points: ArrayView2<'_, f64>,
72    collocationweights: Option<ArrayView1<'_, f64>>,
73    length_scale: Option<f64>,
74    power: f64,
75    nullspace_order: DuchonNullspaceOrder,
76    aniso_log_scales: Option<&[f64]>,
77    identifiability_transform: Option<ArrayView2<'_, f64>>,
78    max_operator_derivative_order: usize,
79    workspace: &mut BasisWorkspace,
80) -> Result<CollocationOperatorMatrices, BasisError> {
81    // The operator design rows are the COLLOCATION points (a density-blind,
82    // space-filling sample of the data support); the columns are the `k` basis
83    // CENTERS. Decoupling them is what makes the operator penalty a faithful
84    // quadrature of `∫‖Dᵠf‖²` (collocating at the `k` centers themselves — the
85    // old `collocation_points == centers` special case — under-samples a
86    // `k`-bump basis and is what made these penalties explode).
87    let nullspace_order = duchon_effective_nullspace_order(centers, nullspace_order);
88    let p_order = duchon_p_from_nullspace_order(nullspace_order);
89    let s_order: f64 = power;
90    let p_colloc = collocation_points.nrows();
91    let n_basis = centers.nrows();
92    let dim = centers.ncols();
93    if collocation_points.ncols() != dim {
94        crate::bail_dim_basis!(
95            "collocation points dim {} != centers dim {dim}",
96            collocation_points.ncols()
97        );
98    }
99    validate_duchon_collocation_orders(
100        length_scale,
101        p_order,
102        s_order,
103        dim,
104        max_operator_derivative_order,
105    )?;
106    if let Some(eta) = aniso_log_scales
107        && eta.len() != dim
108    {
109        crate::bail_dim_basis!(
110            "Duchon anisotropy dimension mismatch: got {}, expected {dim}",
111            eta.len()
112        );
113    }
114    // Partial-fraction expansion only runs in the hybrid Matérn branch
115    // (`length_scale = Some`). The scale-free path (`length_scale = None`)
116    // skips it entirely and is fractional-clean down to the Riesz kernel.
117    let coeffs = length_scale.map(|scale| {
118        let s_int = duchon_power_to_usize(s_order);
119        duchon_partial_fraction_coeffs(p_order, s_int, 1.0 / scale.max(1e-300))
120    });
121    let metric_weights: Option<Vec<f64>> = aniso_log_scales.map(centered_aniso_metric_weights);
122    let row_scales = if let Some(w) = collocationweights {
123        if w.len() != p_colloc {
124            crate::bail_dim_basis!(
125                "collocation weight length mismatch: got {}, expected {p_colloc}",
126                w.len()
127            );
128        }
129        let mut out = Vec::with_capacity(p_colloc);
130        for &wk in w {
131            if !wk.is_finite() || wk < 0.0 {
132                crate::bail_invalid_basis!(
133                    "collocation weights must be finite and non-negative; got {wk}"
134                );
135            }
136            out.push(wk.sqrt());
137        }
138        out
139    } else {
140        vec![1.0; p_colloc]
141    };
142    let z = kernel_constraint_nullspace(centers, nullspace_order, &mut workspace.cache)?;
143    // D0/D1/D2 rows = collocation points (`p_colloc`), columns = basis centers
144    // (`n_basis`). Gradients/Hessians are taken w.r.t. the EVALUATION point
145    // (the collocation row), so `delta = collocation - center`. No symmetry: the
146    // two point sets differ in general.
147    // Skip the costly higher-derivative designs the caller doesn't need: mass
148    // (D0) + tension (D1) build with `max_op = 1`, so the `O(d²)`-row Hessian
149    // (D2) is never allocated or filled — decisive in high `d`.
150    let build_d1 = max_operator_derivative_order >= 1;
151    let build_d2 = max_operator_derivative_order >= 2;
152    let mut d0_raw = Array2::<f64>::zeros((p_colloc, n_basis));
153    let mut d1_raw = Array2::<f64>::zeros((if build_d1 { p_colloc * dim } else { 0 }, n_basis));
154    let mut d2_raw =
155        Array2::<f64>::zeros((if build_d2 { p_colloc * dim * dim } else { 0 }, n_basis));
156    const R_EPS: f64 = 1e-10;
157    for i in 0..p_colloc {
158        let scale_i = row_scales[i];
159        for j in 0..n_basis {
160            let r = if let Some(eta) = aniso_log_scales {
161                let row_i: Vec<f64> = (0..dim).map(|a| collocation_points[[i, a]]).collect();
162                let row_j: Vec<f64> = (0..dim).map(|a| centers[[j, a]]).collect();
163                aniso_distance(&row_i, &row_j, eta)
164            } else {
165                stable_euclidean_norm(
166                    (0..dim).map(|axis| collocation_points[[i, axis]] - centers[[j, axis]]),
167                )
168            };
169            // Floor coincident collocation/center pairs off the kernel's origin
170            // singularity: a farthest-point sample can land exactly on a center.
171            // The gradient/Hessian limits at r→0 are the zeros the `r > R_EPS`
172            // guards below already produce, so flooring only avoids the log-case
173            // `r²·log r` second-derivative blow-up at exact r=0.
174            let r = r.max(R_EPS);
175            let (phi, q, t) = if let (Some(length_scale), Some(coeffs)) =
176                (length_scale, coeffs.as_ref())
177            {
178                let jets =
179                    duchon_radial_jets(r, length_scale, p_order, s_order as usize, dim, coeffs)?;
180                (jets.phi, jets.q, jets.t)
181            } else {
182                let (phi, phi_r, phi_rr) = duchon_kernel_radial_triplet(
183                    r,
184                    length_scale,
185                    p_order,
186                    s_order,
187                    dim,
188                    coeffs.as_ref(),
189                )?;
190                let q = if r > R_EPS { phi_r / r } else { phi_rr };
191                let t = if r > R_EPS {
192                    (phi_rr - q) / (r * r)
193                } else {
194                    0.0
195                };
196                (phi, q, t)
197            };
198            if !phi.is_finite() || !q.is_finite() || !t.is_finite() {
199                crate::bail_invalid_basis!(
200                    "non-finite Duchon collocation operator derivative at (colloc {i}, center {j}), r={r}"
201                );
202            }
203            d0_raw[[i, j]] = scale_i * phi;
204            if build_d2 {
205                for axis_a in 0..dim {
206                    let h_a = collocation_points[[i, axis_a]] - centers[[j, axis_a]];
207                    let w_a = metric_weights
208                        .as_ref()
209                        .map(|weights| weights[axis_a])
210                        .unwrap_or(1.0);
211                    for axis_b in 0..dim {
212                        let h_b = collocation_points[[i, axis_b]] - centers[[j, axis_b]];
213                        let w_b = metric_weights
214                            .as_ref()
215                            .map(|weights| weights[axis_b])
216                            .unwrap_or(1.0);
217                        let diagonal = if axis_a == axis_b { q * w_a } else { 0.0 };
218                        let mixed = if r > R_EPS {
219                            t * w_a * h_a * w_b * h_b
220                        } else {
221                            0.0
222                        };
223                        let value = diagonal + mixed;
224                        let row_i = (i * dim + axis_a) * dim + axis_b;
225                        d2_raw[[row_i, j]] = scale_i * value;
226                    }
227                }
228            }
229            if build_d1 && r > R_EPS {
230                for axis in 0..dim {
231                    let delta = collocation_points[[i, axis]] - centers[[j, axis]];
232                    let axis_scale = metric_weights
233                        .as_ref()
234                        .map(|weights| weights[axis])
235                        .unwrap_or(1.0);
236                    d1_raw[[i * dim + axis, j]] = scale_i * q * axis_scale * delta;
237                }
238            }
239        }
240    }
241    let d0_kernel = fast_ab(&d0_raw, &z);
242    let poly = polynomial_block_from_order(centers, nullspace_order);
243    let kernel_cols = d0_kernel.ncols();
244    let poly_cols = poly.ncols();
245    let total_cols = kernel_cols + poly_cols;
246    // The polynomial block is the unpenalized Duchon null space, left zero before
247    // the outer identifiability transform (these operators feed only penalty
248    // construction). Orders the caller skipped stay empty (0 rows).
249    let mut d0 = Array2::<f64>::zeros((p_colloc, total_cols));
250    d0.slice_mut(s![.., 0..kernel_cols]).assign(&d0_kernel);
251    let mut d1 = Array2::<f64>::zeros((if build_d1 { p_colloc * dim } else { 0 }, total_cols));
252    if build_d1 {
253        d1.slice_mut(s![.., 0..kernel_cols])
254            .assign(&fast_ab(&d1_raw, &z));
255    }
256    let mut d2 =
257        Array2::<f64>::zeros((if build_d2 { p_colloc * dim * dim } else { 0 }, total_cols));
258    if build_d2 {
259        d2.slice_mut(s![.., 0..kernel_cols])
260            .assign(&fast_ab(&d2_raw, &z));
261    }
262    if let Some(z) = identifiability_transform {
263        let z = z.to_owned();
264        d0 = fast_ab(&d0, &z);
265        d1 = fast_ab(&d1, &z);
266        d2 = fast_ab(&d2, &z);
267    }
268    Ok(CollocationOperatorMatrices {
269        d0,
270        d1,
271        d2,
272        collocation_points: collocation_points.to_owned(),
273        kernel_nullspace_transform: Some(z),
274        polynomial_block_cols: poly_cols,
275    })
276}
277
278#[inline(always)]
279pub(crate) fn bessel_k0_stable(x: f64) -> f64 {
280    let x_pos = x.max(1e-300);
281    if x_pos <= 2.0 {
282        return bessel_k0_small_series(x_pos);
283    }
284    let y = 2.0 / x_pos;
285    (-x_pos).exp() / x_pos.sqrt()
286        * (1.253_314_14
287            + y * (-0.078_323_58
288                + y * (0.021_895_68
289                    + y * (-0.010_624_46
290                        + y * (0.005_878_72 + y * (-0.002_515_40 + y * 0.000_532_08))))))
291}
292
293#[inline(always)]
294pub(crate) fn bessel_k1_stable(x: f64) -> f64 {
295    let x_pos = x.max(1e-300);
296    if x_pos <= 2.0 {
297        return bessel_k1_small_series(x_pos);
298    }
299    let y = 2.0 / x_pos;
300    (-x_pos).exp() / x_pos.sqrt()
301        * (1.253_314_14
302            + y * (0.234_986_19
303                + y * (-0.036_556_20
304                    + y * (0.015_042_68
305                        + y * (-0.007_803_53 + y * (0.003_256_14 + y * -0.000_682_45))))))
306}
307
308#[inline(always)]
309pub(crate) fn bessel_k0_k1_small_series(x: f64) -> (f64, f64) {
310    const EULER_GAMMA: f64 = 0.577_215_664_901_532_9;
311    let y = 0.25 * x * x;
312    let log_half_plus_gamma = 0.5 * y.ln() + EULER_GAMMA;
313    let mut i0 = 1.0;
314    let mut i1 = 0.5 * x;
315    let mut harmonic = 0.0;
316    let mut y_power_over_fact_sq = 1.0;
317    let mut k0_series = 0.0;
318    let mut k0_series_y_derivative_times_y = 0.0;
319    for k in 1..=256 {
320        let kf = k as f64;
321        harmonic += 1.0 / kf;
322        y_power_over_fact_sq *= y / (kf * kf);
323        let k0_term = harmonic * y_power_over_fact_sq;
324        k0_series += k0_term;
325        k0_series_y_derivative_times_y += kf * k0_term;
326        i0 += y_power_over_fact_sq;
327        i1 += 0.5 * x * y_power_over_fact_sq / (kf + 1.0);
328        if k0_term.abs() <= f64::EPSILON * i0.abs().max(k0_series.abs()).max(1.0) {
329            break;
330        }
331    }
332
333    let k0 = -log_half_plus_gamma * i0 + k0_series;
334    let k1 = i0 / x + log_half_plus_gamma * i1 - (2.0 / x) * k0_series_y_derivative_times_y;
335    (k0, k1)
336}
337
338#[inline(always)]
339pub(crate) fn bessel_k0_small_series(x: f64) -> f64 {
340    bessel_k0_k1_small_series(x).0
341}
342
343#[inline(always)]
344pub(crate) fn bessel_k1_small_series(x: f64) -> f64 {
345    bessel_k0_k1_small_series(x).1
346}
347
348pub(crate) const DUCHON_DERIVATIVE_R_FLOOR_REL: f64 = 1e-5;
349
350pub(crate) const DUCHON_COLLISION_TAYLOR_REL: f64 = 1e-4;
351
352/// Minimum `(row, center)` pair count before a radial design sweep builds a
353/// certified [`radial_profile::RadialProfile`] instead of evaluating every
354/// pair exactly. The profile build costs a few hundred exact jet
355/// evaluations, so it only pays for itself when the sweep reuses it well
356/// beyond that; below the threshold the exact path keeps small fits
357/// bit-identical to the pre-profile behavior.
358pub(crate) const RADIAL_PROFILE_MIN_PAIRS: usize = 16_384;
359
360#[inline(always)]
361pub(crate) fn duchon_p_from_nullspace_order(order: DuchonNullspaceOrder) -> usize {
362    match order {
363        // Duchon null spaces contain all polynomials of degree < m.
364        // The public `order` knob chooses that polynomial degree cutoff:
365        //   order=0 -> constants only  -> m=1
366        //   order=1 -> constants+linear -> m=2
367        DuchonNullspaceOrder::Zero => 1,
368        DuchonNullspaceOrder::Linear => 2,
369        DuchonNullspaceOrder::Degree(degree) => degree + 1,
370    }
371}
372
373/// Returns the effective Duchon null-space order, auto-degrading when the
374/// requested order leaves no radial kernel degrees of freedom.
375///
376/// The constrained kernel block has `centers.nrows() - rank(P)` columns, where
377/// `P` is the polynomial null-space block. A valid polynomial block with
378/// exactly as many centers as columns is still useless for smoothing: every
379/// center is consumed by the side constraints and the design collapses to the
380/// polynomial tail. Degrade to the highest lower null-space order with at
381/// least one constrained kernel column.
382pub(crate) fn duchon_effective_nullspace_order(
383    centers: ArrayView2<'_, f64>,
384    order: DuchonNullspaceOrder,
385) -> DuchonNullspaceOrder {
386    if order == DuchonNullspaceOrder::Zero {
387        return order;
388    }
389    let mut effective = order;
390    while effective != DuchonNullspaceOrder::Zero
391        && centers.nrows() <= polynomial_block_from_order(centers, effective).ncols()
392    {
393        effective = duchon_previous_nullspace_order(effective);
394    }
395    if effective != order {
396        // Dedup: warn only once per (rows, cols, requested_order) per process.
397        // BFGS × P-IRLS × derivative callsites hit this path many times.
398        static SEEN: std::sync::OnceLock<
399            std::sync::Mutex<std::collections::HashSet<(usize, usize, DuchonNullspaceOrder)>>,
400        > = std::sync::OnceLock::new();
401        let seen = SEEN.get_or_init(|| std::sync::Mutex::new(std::collections::HashSet::new()));
402        let key = (centers.nrows(), centers.ncols(), order);
403        let fresh = seen.lock().map(|mut s| s.insert(key)).unwrap_or(true);
404        if fresh {
405            let requested_cols = polynomial_block_from_order(centers, order).ncols();
406            let effective_cols = polynomial_block_from_order(centers, effective).ncols();
407            log::warn!(
408                "Duchon nullspace order={:?} in dim={} with {} centers leaves no radial kernel columns (polynomial_cols={}); degrading to {:?} (polynomial_cols={})",
409                order,
410                centers.ncols(),
411                centers.nrows(),
412                requested_cols,
413                effective,
414                effective_cols
415            );
416        }
417    }
418    effective
419}
420
421#[inline(always)]
422pub(crate) fn gamma_lanczos(x: f64) -> f64 {
423    // Numerical Recipes / Lanczos approximation with reflection formula.
424    const G: f64 = 7.0;
425    const P: [f64; 9] = [
426        0.999_999_999_999_809_9,
427        676.520_368_121_885_1,
428        -1_259.139_216_722_402_8,
429        771.323_428_777_653_1,
430        -176.615_029_162_140_6,
431        12.507_343_278_686_905,
432        -0.138_571_095_265_720_12,
433        9.984_369_578_019_571e-6,
434        1.505_632_735_149_311_6e-7,
435    ];
436    if x < 0.5 {
437        let pix = std::f64::consts::PI * x;
438        return std::f64::consts::PI / (pix.sin() * gamma_lanczos(1.0 - x));
439    }
440    let z = x - 1.0;
441    let mut a = P[0];
442    for (i, coeff) in P.iter().enumerate().skip(1) {
443        a += coeff / (z + i as f64);
444    }
445    let t = z + G + 0.5;
446    (2.0 * std::f64::consts::PI).sqrt() * t.powf(z + 0.5) * (-t).exp() * a
447}
448
449#[inline(always)]
450pub(crate) fn bessel_k_integer_order(n: usize, z: f64) -> f64 {
451    let zz = z.max(1e-300);
452    if n == 0 {
453        return bessel_k0_stable(zz);
454    }
455    if n == 1 {
456        return bessel_k1_stable(zz);
457    }
458    let mut km1 = bessel_k0_stable(zz);
459    let mut k = bessel_k1_stable(zz);
460    for m in 1..n {
461        let kp1 = km1 + 2.0 * (m as f64) * k / zz;
462        km1 = k;
463        k = kp1;
464    }
465    k
466}
467
468#[inline(always)]
469pub(crate) fn bessel_k_half_integer_order(l: usize, z: f64) -> f64 {
470    // Exact closed-form seeds and the stable upward recurrence
471    //   K_{1/2}(z) = sqrt(π/(2z))·e^{−z},
472    //   K_{3/2}(z) = K_{1/2}(z)·(1 + 1/z),
473    //   K_{ν+1}(z) = K_{ν−1}(z) + (2ν/z)·K_ν(z)   (ν = 1/2 + m, m ≥ 1).
474    // Equivalent to the closed-form polynomial sum, but uses EXACT integer
475    // coefficients via the recurrence instead of approximate Lanczos-gamma
476    // values for `c_j = (l+j)!/(j!(l−j)!)`. The Lanczos approximation is
477    // accurate to ~1 ULP at integer arguments; that error gets amplified
478    // through catastrophic cancellation in derivative lattices of the
479    // r^μ·K_μ(κr) family. Matching the [`BesselKLadder`] arithmetic byte-
480    // for-byte also ensures the ladder/per-call paths agree exactly.
481    let zz = z.max(1e-300);
482    let k_half = (std::f64::consts::PI / (2.0 * zz)).sqrt() * (-zz).exp();
483    if l == 0 {
484        return k_half;
485    }
486    let mut km1 = k_half;
487    let mut k = k_half * (1.0 + 1.0 / zz);
488    for m in 1..l {
489        let nu = m as f64 + 0.5;
490        let kp1 = km1 + 2.0 * nu * k / zz;
491        km1 = k;
492        k = kp1;
493    }
494    k
495}
496
497#[inline(always)]
498pub(crate) fn bessel_k_real_half_integer_or_integer(
499    nu_abs: f64,
500    z: f64,
501) -> Result<f64, BasisError> {
502    let two_nu = (2.0 * nu_abs).round();
503    if (two_nu - 2.0 * nu_abs).abs() > 1e-12 {
504        crate::bail_invalid_basis!(
505            "unsupported Bessel-K order ν={nu_abs}; only integer/half-integer orders are supported"
506        );
507    }
508    let two_nu_i = two_nu as i64;
509    if two_nu_i % 2 == 0 {
510        let n = (two_nu_i / 2).max(0) as usize;
511        Ok(bessel_k_integer_order(n, z))
512    } else {
513        let l = ((two_nu_i - 1) / 2).max(0) as usize;
514        Ok(bessel_k_half_integer_order(l, z))
515    }
516}
517
518/// Precomputed coefficient for `polyharmonic_kernel` that depends only on
519/// `m` and `k_dim`, not on `r`.  Avoids repeated gamma_lanczos calls in the
520/// hot kernel evaluation loop (called n × k times per basis build).
521#[derive(Clone, Copy)]
522pub(crate) struct PolyharmonicBlockCoeff {
523    pub(crate) c: f64,
524    pub(crate) power: f64,
525    pub(crate) is_log_case: bool,
526}
527
528impl PolyharmonicBlockCoeff {
529    pub(crate) fn new(m: f64, k_dim: usize) -> Self {
530        assert!(
531            m.is_finite() && m > 0.0,
532            "PolyharmonicBlockCoeff::new: m must be finite and > 0, got {m}"
533        );
534        let k_half = 0.5 * k_dim as f64;
535        let power = 2.0 * m - k_dim as f64;
536        // Log case: k_dim is even and `2m − k_dim` is a non-negative even
537        // integer (within ε). For fractional `m` this never fires; for
538        // integer `m` it matches the original integer modulo check exactly.
539        const LOG_EPS: f64 = 1e-12;
540        let two_m = 2.0 * m;
541        let is_log_case = k_dim.is_multiple_of(2) && {
542            let n_f = (power / 2.0).round();
543            n_f >= 0.0 && (n_f * 2.0 - power).abs() < LOG_EPS
544        };
545        if is_log_case {
546            let m_int = m.round() as i64;
547            let m_minus_half_d_plus_one = (m - k_half + 1.0).round() as i64;
548            let c = polyharmonic_log_sign(m_int as usize, k_dim)
549                / (2.0_f64.powi((two_m.round() as i32) - 1)
550                    * std::f64::consts::PI.powf(k_half)
551                    * gamma_lanczos(m)
552                    * gamma_lanczos(m_minus_half_d_plus_one as f64));
553            Self {
554                c,
555                power,
556                is_log_case: true,
557            }
558        } else {
559            let c = gamma_lanczos(k_half - m)
560                / (4.0_f64.powf(m) * std::f64::consts::PI.powf(k_half) * gamma_lanczos(m));
561            Self {
562                c,
563                power,
564                is_log_case: false,
565            }
566        }
567    }
568
569    #[inline(always)]
570    pub(crate) fn eval(&self, r: f64) -> f64 {
571        if r <= 0.0 {
572            return self.origin_limit();
573        }
574        if self.is_log_case {
575            self.c * r.powf(self.power) * r.max(1e-300).ln()
576        } else {
577            self.c * r.powf(self.power)
578        }
579    }
580
581    #[inline(always)]
582    pub(crate) fn origin_limit(&self) -> f64 {
583        if self.is_log_case {
584            log_power_origin_limit(self.c, self.power, 1.0, 0.0)
585        } else {
586            log_power_origin_limit(self.c, self.power, 0.0, 1.0)
587        }
588    }
589}
590
591pub(crate) fn polyharmonic_kernel(r: f64, m: f64, k_dim: usize) -> f64 {
592    PolyharmonicBlockCoeff::new(m, k_dim).eval(r)
593}
594
595#[inline(always)]
596pub(crate) fn signed_infinity(sign: f64) -> f64 {
597    if sign.is_sign_negative() {
598        f64::NEG_INFINITY
599    } else {
600        f64::INFINITY
601    }
602}
603
604#[inline(always)]
605pub(crate) fn log_power_origin_limit(
606    coeff: f64,
607    exponent: f64,
608    log_coeff: f64,
609    pure_coeff: f64,
610) -> f64 {
611    if log_coeff == 0.0 && pure_coeff == 0.0 {
612        return 0.0;
613    }
614    if exponent > 0.0 {
615        return 0.0;
616    }
617    if exponent == 0.0 {
618        if log_coeff != 0.0 {
619            signed_infinity(-coeff * log_coeff)
620        } else {
621            coeff * pure_coeff
622        }
623    } else if log_coeff != 0.0 {
624        signed_infinity(-coeff * log_coeff)
625    } else {
626        signed_infinity(coeff * pure_coeff)
627    }
628}
629
630#[inline(always)]
631pub(crate) fn polyharmonic_log_sign(m: usize, k_dim: usize) -> f64 {
632    assert!(
633        k_dim.is_multiple_of(2),
634        "polyharmonic_log_sign requires even kernel dimension: k_dim={k_dim}, m={m}"
635    );
636    (-1.0_f64).powi(m as i32 - (k_dim as i32 / 2) + 1)
637}
638
639#[inline(always)]
640pub(crate) fn duchon_matern_block(
641    r: f64,
642    kappa: f64,
643    n_order: usize,
644    k_dim: usize,
645) -> Result<f64, BasisError> {
646    let n = n_order as f64;
647    let k_half = 0.5 * k_dim as f64;
648    let nu = n - k_half;
649    let nu_abs = nu.abs();
650    let c = kappa.powf(k_half - n)
651        / ((2.0 * std::f64::consts::PI).powf(k_half) * 2.0_f64.powf(n - 1.0) * gamma_lanczos(n));
652    if r <= 0.0 {
653        if nu > 0.0 {
654            // r^ν K_ν(κr) → 2^(ν−1) Γ(ν) κ^(−ν) as r→0+.
655            return Ok(c * 2.0_f64.powf(nu - 1.0) * gamma_lanczos(nu) * kappa.powf(-nu));
656        }
657        // ν ≤ 0: c·r^ν·K_|ν|(κr) is divergent at r=0 (logarithmically for ν=0,
658        // power-law for ν<0). The hybrid-kernel diagonal must be evaluated via
659        // duchon_hybrid_kernel_collision_value, which sums the divergent
660        // Matérn and polyharmonic blocks so the singularities cancel exactly
661        // (guaranteed by the PFD identity when 2(p+s) > d).
662        crate::bail_invalid_basis!(
663            "Duchon Matérn block at r=0 with ν={nu} ≤ 0 is divergent; \
664             evaluate the hybrid kernel diagonal via the collision routine"
665        );
666    }
667    let z = (kappa * r).max(1e-300);
668    let k_nu = bessel_k_real_half_integer_or_integer(nu_abs, z)?;
669    Ok(c * r.powf(nu) * k_nu)
670}
671
672#[inline(always)]
673pub(crate) fn polyharmonic_kernel_triplet(
674    r: f64,
675    m: f64,
676    k_dim: usize,
677) -> Result<(f64, f64, f64), BasisError> {
678    let (value, first, second, _, _) = polyharmonic_block_jet4(r, m, k_dim)?;
679    Ok((value, first, second))
680}
681
682#[inline(always)]
683pub(crate) fn falling_factorial(alpha: f64, order: usize) -> f64 {
684    (0..order).fold(1.0, |acc, idx| acc * (alpha - idx as f64))
685}
686
687#[inline(always)]
688pub(crate) fn falling_factorial_derivative(alpha: f64, order: usize) -> f64 {
689    if order == 0 {
690        return 0.0;
691    }
692    let mut total = 0.0;
693    for omit in 0..order {
694        let mut term = 1.0;
695        for idx in 0..order {
696            if idx != omit {
697                term *= alpha - idx as f64;
698            }
699        }
700        total += term;
701    }
702    total
703}
704
705/// Unified radial jet for one polyharmonic partial-fraction block.
706///
707/// Returns (φ, φ', φ'', φ''', φ'''') from a single consistent evaluation,
708/// sharing normalization constant, r_safe, and log_r. This eliminates the
709/// possibility of numerical drift between the triplet and higher-order
710/// derivative paths.
711pub(crate) fn polyharmonic_block_jet4(
712    r: f64,
713    m: f64,
714    k_dim: usize,
715) -> Result<(f64, f64, f64, f64, f64), BasisError> {
716    if !r.is_finite() || r < 0.0 {
717        crate::bail_invalid_basis!("polyharmonic distance must be finite and non-negative");
718    }
719    assert!(
720        m.is_finite() && m > 0.0,
721        "polyharmonic_block_jet4: m must be finite and > 0, got {m}"
722    );
723
724    let k_half = 0.5 * k_dim as f64;
725    let alpha = 2.0 * m - k_dim as f64;
726    // Log case: k_dim even and `2m − k_dim` is a non-negative even integer
727    // (within ε). For fractional `m` this never fires.
728    const LOG_EPS: f64 = 1e-12;
729    let is_log_case = k_dim.is_multiple_of(2) && {
730        let n_f = (alpha / 2.0).round();
731        n_f >= 0.0 && (n_f * 2.0 - alpha).abs() < LOG_EPS
732    };
733    if is_log_case {
734        let m_int = m.round() as usize;
735        let c = polyharmonic_log_sign(m_int, k_dim)
736            / (2.0_f64.powi((2 * m_int - 1) as i32)
737                * std::f64::consts::PI.powf(k_half)
738                * gamma_lanczos(m)
739                * gamma_lanczos((m_int - k_dim / 2 + 1) as f64));
740        let mut out = [0.0; 5];
741        for d in 0..5 {
742            let e = alpha - d as f64;
743            let ff = falling_factorial(alpha, d);
744            let ff_d = falling_factorial_derivative(alpha, d);
745            out[d] = if r <= 0.0 {
746                log_power_origin_limit(c, e, ff, ff_d)
747            } else {
748                c * r.powf(e) * (ff * r.ln() + ff_d)
749            };
750        }
751        return Ok((out[0], out[1], out[2], out[3], out[4]));
752    }
753
754    let c = gamma_lanczos(k_half - m)
755        / (4.0_f64.powf(m) * std::f64::consts::PI.powf(k_half) * gamma_lanczos(m));
756    let mut out = [0.0; 5];
757    for d in 0..5 {
758        let e = alpha - d as f64;
759        let ff = falling_factorial(alpha, d);
760        out[d] = if r <= 0.0 {
761            log_power_origin_limit(c, e, 0.0, ff)
762        } else {
763            c * ff * r.powf(e)
764        };
765    }
766    Ok((out[0], out[1], out[2], out[3], out[4]))
767}
768
769#[inline(always)]
770pub(crate) fn log_power_family_derivative(
771    exponent: f64,
772    log_coeff: f64,
773    pure_coeff: f64,
774) -> (f64, f64, f64) {
775    (
776        exponent - 1.0,
777        exponent * log_coeff,
778        exponent * pure_coeff + log_coeff,
779    )
780}
781
782#[inline(always)]
783pub(crate) fn log_power_family_value(
784    r: f64,
785    coeff: f64,
786    exponent: f64,
787    log_coeff: f64,
788    pure_coeff: f64,
789) -> f64 {
790    if r <= 0.0 {
791        log_power_origin_limit(coeff, exponent, log_coeff, pure_coeff)
792    } else {
793        coeff * r.powf(exponent) * (log_coeff * r.ln() + pure_coeff)
794    }
795}
796
797#[inline(always)]
798pub(crate) fn duchon_polyharmonic_operator_block_jets(
799    r: f64,
800    m: f64,
801    k_dim: usize,
802) -> Result<(f64, f64, f64, f64), BasisError> {
803    if !r.is_finite() || r < 0.0 {
804        crate::bail_invalid_basis!("polyharmonic distance must be finite and non-negative");
805    }
806    assert!(
807        m.is_finite() && m > 0.0,
808        "duchon_polyharmonic_operator_block_jets: m must be finite and > 0, got {m}"
809    );
810
811    let k_half = 0.5 * k_dim as f64;
812    let alpha = 2.0 * m - k_dim as f64;
813    // Log case: k_dim even and `2m − k_dim` is a non-negative even integer
814    // (within ε). For fractional `m` this never fires; for integer `m` it
815    // matches the original `k_dim % 2 == 0 && m >= k_dim / 2` check.
816    const LOG_EPS: f64 = 1e-12;
817    let is_log_case = k_dim.is_multiple_of(2) && {
818        let n_f = (alpha / 2.0).round();
819        n_f >= 0.0 && (n_f * 2.0 - alpha).abs() < LOG_EPS
820    };
821    let (c, phi_log_coeff, phi_pure_coeff) = if is_log_case {
822        let m_int = m.round() as usize;
823        (
824            polyharmonic_log_sign(m_int, k_dim)
825                / (2.0_f64.powi((2 * m_int - 1) as i32)
826                    * std::f64::consts::PI.powf(k_half)
827                    * gamma_lanczos(m)
828                    * gamma_lanczos((m_int - k_dim / 2 + 1) as f64)),
829            1.0,
830            0.0,
831        )
832    } else {
833        (
834            gamma_lanczos(k_half - m)
835                / (4.0_f64.powf(m) * std::f64::consts::PI.powf(k_half) * gamma_lanczos(m)),
836            0.0,
837            1.0,
838        )
839    };
840
841    let (phi_r_exp, phi_r_log, phi_r_pure) =
842        log_power_family_derivative(alpha, phi_log_coeff, phi_pure_coeff);
843    let q_exp = phi_r_exp - 1.0;
844    let q = log_power_family_value(r, c, q_exp, phi_r_log, phi_r_pure);
845
846    let (q_r_exp_raw, q_r_log, q_r_pure) =
847        log_power_family_derivative(q_exp, phi_r_log, phi_r_pure);
848    let t_exp = q_r_exp_raw - 1.0;
849    let t = log_power_family_value(r, c, t_exp, q_r_log, q_r_pure);
850
851    let (t_r_exp, t_r_log, t_r_pure) = log_power_family_derivative(t_exp, q_r_log, q_r_pure);
852    let t_r = log_power_family_value(r, c, t_r_exp, t_r_log, t_r_pure);
853
854    let (t_rr_exp, t_rr_log, t_rr_pure) = log_power_family_derivative(t_r_exp, t_r_log, t_r_pure);
855    let t_rr = log_power_family_value(r, c, t_rr_exp, t_rr_log, t_rr_pure);
856
857    Ok((q, t, t_r, t_rr))
858}
859
860/// Shared Bessel-K ladder for one evaluation point `z = κ·r`.
861///
862/// Every Matérn partial-fraction block and every term of its radial
863/// derivative lattice consumes `K_ν(z)` at orders from ONE parity class
864/// (integer when the covariate dimension is even, half-integer when odd),
865/// differing by integers — and all at the SAME `z`. The previous code
866/// restarted the `K₀/K₁` (or closed-form half-integer) seed evaluation and
867/// the upward recurrence inside every per-term Bessel call: hundreds of
868/// redundant seed+recurrence runs per `(row, center)` pair, which the #979
869/// CTN stage-1 stack profile showed to be the dominant cost of every Duchon
870/// κ-trial at scale. One ladder per point replaces all of them: two seed
871/// evaluations plus the standard upward recurrence
872/// `K_{ν+1}(z) = K_{ν−1}(z) + (2ν/z)·K_ν(z)`, which is the numerically
873/// STABLE direction for `K` (it grows with ν). For integer orders this is
874/// arithmetic-identical to the old per-call `bessel_k_integer_order`, which
875/// ran the same seeds and recurrence internally; for half-integer orders the
876/// recurrence is exact and replaces the per-order closed-form sum.
877pub(crate) struct BesselKLadder {
878    /// `values[i] = K_{base + i}(z)` with `base ∈ {0, ½}`.
879    pub(crate) values: SmallVec<[f64; 16]>,
880    pub(crate) half_integer: bool,
881}
882
883impl BesselKLadder {
884    pub(crate) fn build(z: f64, half_integer: bool, max_order_steps: usize) -> Self {
885        let zz = z.max(1e-300);
886        let mut values: SmallVec<[f64; 16]> = SmallVec::with_capacity(max_order_steps + 2);
887        if half_integer {
888            // K_{1/2}(z) = √(π/(2z))·e^{−z};  K_{3/2}(z) = K_{1/2}(z)·(1 + 1/z).
889            let k_half = (std::f64::consts::PI / (2.0 * zz)).sqrt() * (-zz).exp();
890            values.push(k_half);
891            values.push(k_half * (1.0 + 1.0 / zz));
892        } else {
893            values.push(bessel_k0_stable(zz));
894            values.push(bessel_k1_stable(zz));
895        }
896        let base = if half_integer { 0.5 } else { 0.0 };
897        for i in 1..max_order_steps {
898            let nu = base + i as f64;
899            let next = values[i - 1] + 2.0 * nu * values[i] / zz;
900            values.push(next);
901        }
902        Self {
903            values,
904            half_integer,
905        }
906    }
907
908    /// `K_{|order|}(z)` from the ladder (`K_{−ν} = K_ν`).
909    #[inline]
910    pub(crate) fn k_abs(&self, order_abs: f64) -> f64 {
911        let base = if self.half_integer { 0.5 } else { 0.0 };
912        let idx = (order_abs - base).round() as usize;
913        self.values[idx]
914    }
915}
916
917/// Radial-derivative jets of the Matérn family `coeff·r^μ·K_μ(κr)` up to
918/// order `max_j ≤ 4`, evaluated against a shared [`BesselKLadder`].
919///
920/// Exact recurrence derived from `d/dr[r^ν K_ν(κr)]` and the Bessel identity
921/// `dK_ν/dz = −K_{ν−1}(z) − (ν/z)K_ν(z)`:
922///
923///   g⁽⁰⁾ = c · r^ν · K_ν(z)
924///   g⁽¹⁾ = −c · κ · r^ν · K_{ν−1}(z)
925///   g⁽²⁾ = c·κ² r^ν K_{ν−2} − c·κ r^{ν−1} K_{ν−1}, ...
926///
927/// Same derivative lattice as the per-order reference implementation
928/// `duchon_matern_family_radial_derivative_reference` (kept in the test
929/// module as the equivalence oracle)
930/// (term-for-term, in the same order), but: (a) the lattice is expanded
931/// incrementally once instead of rebuilt from scratch per derivative order,
932/// (b) terms live in a fixed-capacity stack buffer instead of per-call heap
933/// `Vec`s (≤ 2^max_j ≤ 16 terms), and (c) every Bessel factor is an indexed
934/// ladder read instead of a fresh seed+recurrence evaluation. Only orders
935/// `0..=max_j` are computed — the q-family consumes order 0 only and the
936/// t-family orders ≤ 2, where the old path always expanded to order 4 and
937/// discarded the tail.
938pub(crate) fn duchon_matern_family_jets_with_ladder(
939    r: f64,
940    kappa: f64,
941    coeff: f64,
942    mu: f64,
943    max_j: usize,
944    ladder: &BesselKLadder,
945    out: &mut [f64],
946) -> Result<(), BasisError> {
947    if max_j > 4 || out.len() <= max_j {
948        crate::bail_invalid_basis!(
949            "Duchon Matérn-family ladder jets support derivative orders 0..=4 with an output slot per order"
950        );
951    }
952    if r <= 0.0 {
953        out[..=max_j].fill(0.0);
954        if mu > 0.0 {
955            out[0] = coeff * 2.0_f64.powf(mu - 1.0) * gamma_lanczos(mu) * kappa.powf(-mu);
956        }
957        return Ok(());
958    }
959    let mut terms: SmallVec<[DuchonMaternDerivativeTerm; 16]> =
960        smallvec![DuchonMaternDerivativeTerm {
961            coeff,
962            kappa_power: 0,
963            r_power: mu,
964            bessel_order: mu,
965        }];
966    for (j, slot) in out.iter_mut().enumerate().take(max_j + 1) {
967        if j > 0 {
968            let mut next: SmallVec<[DuchonMaternDerivativeTerm; 16]> =
969                SmallVec::with_capacity(terms.len() * 2);
970            for term in &terms {
971                let stay_coeff = term.coeff * (term.r_power - term.bessel_order);
972                if stay_coeff != 0.0 {
973                    next.push(DuchonMaternDerivativeTerm {
974                        coeff: stay_coeff,
975                        kappa_power: term.kappa_power,
976                        r_power: term.r_power - 1.0,
977                        bessel_order: term.bessel_order,
978                    });
979                }
980                next.push(DuchonMaternDerivativeTerm {
981                    coeff: -term.coeff,
982                    kappa_power: term.kappa_power + 1,
983                    r_power: term.r_power,
984                    bessel_order: term.bessel_order - 1.0,
985                });
986            }
987            terms = next;
988        }
989        let mut value = KahanSum::default();
990        for term in &terms {
991            if term.coeff == 0.0 {
992                continue;
993            }
994            value.add(
995                term.coeff
996                    * kappa.powi(term.kappa_power as i32)
997                    * r.powf(term.r_power)
998                    * ladder.k_abs(term.bessel_order.abs()),
999            );
1000        }
1001        *slot = value.sum();
1002    }
1003    Ok(())
1004}
1005
1006/// Maximum ladder steps (`K_base ..= K_{base+steps}`) needed by the q/t
1007/// operator families of Matérn block `n` in dimension `k_dim`: the q-family
1008/// reads `K_{|ν−1|}` and the t-family `K_{|ν−2−j|}` for `j ≤ 2`, ν = n − d/2.
1009pub(crate) fn duchon_matern_block_max_ladder_steps(n_order: usize, k_dim: usize) -> usize {
1010    let nu = n_order as f64 - 0.5 * k_dim as f64;
1011    let candidates = [
1012        (nu - 1.0).abs(),
1013        (nu - 2.0).abs(),
1014        (nu - 3.0).abs(),
1015        (nu - 4.0).abs(),
1016    ];
1017    let max_abs = candidates.iter().copied().fold(0.0_f64, f64::max);
1018    max_abs.floor() as usize + 1
1019}
1020
1021pub(crate) fn duchon_matern_operator_block_jets_with_ladder(
1022    r: f64,
1023    kappa: f64,
1024    n_order: usize,
1025    k_dim: usize,
1026    ladder: &BesselKLadder,
1027) -> Result<(f64, f64, f64, f64), BasisError> {
1028    if r <= 0.0 {
1029        return Ok((0.0, 0.0, 0.0, 0.0));
1030    }
1031    let n = n_order as f64;
1032    let k_half = 0.5 * k_dim as f64;
1033    let nu = n - k_half;
1034    let c = kappa.powf(k_half - n)
1035        / ((2.0 * std::f64::consts::PI).powf(k_half) * 2.0_f64.powf(n - 1.0) * gamma_lanczos(n));
1036
1037    let mut q_out = [0.0_f64; 1];
1038    duchon_matern_family_jets_with_ladder(r, kappa, -c * kappa, nu - 1.0, 0, ladder, &mut q_out)?;
1039    let mut t_out = [0.0_f64; 3];
1040    duchon_matern_family_jets_with_ladder(
1041        r,
1042        kappa,
1043        c * kappa * kappa,
1044        nu - 2.0,
1045        2,
1046        ladder,
1047        &mut t_out,
1048    )?;
1049    Ok((q_out[0], t_out[0], t_out[1], t_out[2]))
1050}
1051
1052#[inline(always)]
1053pub(crate) fn pure_duchon_block_order(p_order: usize, s_order: f64) -> f64 {
1054    p_order as f64 + s_order
1055}
1056
1057pub(crate) fn validate_duchon_kernel_orders(
1058    length_scale: Option<f64>,
1059    p_order: usize,
1060    s_order: f64,
1061    k_dim: usize,
1062) -> Result<(), BasisError> {
1063    if k_dim == 0 {
1064        crate::bail_invalid_basis!("Duchon basis requires at least one covariate dimension");
1065    }
1066    if let Some(scale) = length_scale
1067        && (!scale.is_finite() || scale <= 0.0)
1068    {
1069        crate::bail_invalid_basis!("Duchon hybrid length_scale must be finite and positive");
1070    }
1071    // Two independent well-posedness conditions on (p, s, d) for pure Duchon.
1072    //
1073    // (1) CPD-vs-nullspace adequacy — gated below on `length_scale.is_none()`.
1074    //     The pure-polyharmonic kernel of effective order m = p+s in R^d is
1075    //     phi(r) = r^{2m-d}, or r^{2m-d}·log r when 2m-d is a non-negative
1076    //     even integer (the "log case", reached precisely when d is even
1077    //     and m >= d/2). Wendland's Theorem 8.17 / 8.18 give its
1078    //     conditional-positive-definiteness order:
1079    //
1080    //         d odd,  exponent half-integer:  ceil((2m-d)/2) = m - (d-1)/2
1081    //         d even, log case:               (2m-d)/2 + 1   = m - d/2 + 1
1082    //
1083    //     Duchon interpolation with polynomial nullspace P_p (polynomials
1084    //     of degree < p) is uniquely solvable iff the kernel's CPD order
1085    //     does not exceed p. Substituting m = p + s:
1086    //
1087    //         d odd:  s <= (d-1)/2     <=>  2s <= d - 1
1088    //         d even: s <= d/2 - 1     <=>  2s <= d - 2
1089    //
1090    //     Both branches collapse to `2s < d` once we use that s and d are
1091    //     integers and 2s is therefore even (so `2s = d - 1` is impossible
1092    //     for even d, and `2s <= d - 2` is just `2s < d`).
1093    //
1094    //     Counter-example admitted if this guard is dropped: d=2, p=1, s=1
1095    //     passes the spectral check (2(1+1)=4 > 2) and builds the TPS
1096    //     kernel c·r²·log r against a constants-only nullspace P_1; the
1097    //     interpolation form is not PD on lambda perp P_1 and the fitted
1098    //     penalty is meaningless.
1099    //
1100    //     The hybrid (Matérn-blended) Duchon kernel sidesteps this entirely:
1101    //     the Matérn remainder is strictly positive definite (CPD order 0),
1102    //     so any P_p suffices — hence the `length_scale.is_none()` gate.
1103    //
1104    // (2) Spectral kernel-existence — universal, gated below on the sum.
1105    //     The pointwise kernel comes from the inverse Fourier of
1106    //     1/|xi|^{2(p+s)}, which is a finite distribution at the origin
1107    //     iff `2(p+s) > d`. Below that threshold the radial kernel value
1108    //     diverges and there is nothing to evaluate.
1109    if !s_order.is_finite() || s_order < 0.0 {
1110        crate::bail_invalid_basis!("Duchon spectral power must be finite and ≥ 0; got s={s_order}");
1111    }
1112    if length_scale.is_none() && p_order < 2 && 2.0 * s_order >= k_dim as f64 {
1113        crate::bail_invalid_basis!(
1114            "pure Duchon requires power < dimension/2 for nullspace degree < {p_order}; got power={s_order}, dimension={k_dim}"
1115        );
1116    }
1117    let spectral_order = 2.0 * (p_order as f64 + s_order);
1118    if spectral_order <= k_dim as f64 {
1119        crate::bail_invalid_basis!(
1120            "Duchon pointwise kernel values require 2*(p+s) > dimension; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1121        );
1122    }
1123    Ok(())
1124}
1125
1126pub(crate) fn validate_duchon_collocation_orders(
1127    length_scale: Option<f64>,
1128    p_order: usize,
1129    s_order: f64,
1130    k_dim: usize,
1131    max_operator_derivative_order: usize,
1132) -> Result<(), BasisError> {
1133    // Kernel-level conditions (existence + CPD/nullspace adequacy) come first;
1134    // the operator-level conditions below build on a pointwise-valid kernel.
1135    validate_duchon_kernel_orders(length_scale, p_order, s_order, k_dim)?;
1136    // The spectral_order > k_dim + k checks below are C^k-at-origin
1137    // conditions: for the polyharmonic kernel r^{2(p+s)-d} (or the log
1138    // variant) to admit k-th radial derivatives in the distributional sense
1139    // — and therefore for k-th-order derivative *collocation* of the
1140    // kernel against centers to produce a finite operator — we need its
1141    // exponent to clear the next k orders of differentiation at the
1142    // origin. Equivalently: 2(p+s) - d > k.
1143    //
1144    // Note these are independent of the CPD/nullspace check. The penalty
1145    // matrices ultimately built from these collocation operators are of
1146    // the form S = D_k^T D_k and are PSD by construction; the discipline
1147    // here is purely about *existence* of D_k itself.
1148    let spectral_order = 2.0 * (p_order as f64 + s_order);
1149    if max_operator_derivative_order >= 1 && spectral_order <= k_dim as f64 + 1.0 {
1150        crate::bail_invalid_basis!(
1151            "Duchon D1 collocation requires 2*(p+s) > dimension+1; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1152        );
1153    }
1154    if max_operator_derivative_order >= 2 && spectral_order <= k_dim as f64 + 2.0 {
1155        crate::bail_invalid_basis!(
1156            "Duchon D2 collocation requires 2*(p+s) > dimension+2; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1157        );
1158    }
1159    Ok(())
1160}
1161
1162#[derive(Debug, Clone)]
1163pub struct DuchonPartialFractionCoeffs {
1164    pub(crate) a: Vec<f64>,
1165    pub(crate) b: Vec<f64>,
1166}
1167
1168#[inline(always)]
1169pub(crate) fn duchon_partial_fraction_coeffs(
1170    p_order: usize,
1171    s_order: usize,
1172    kappa: f64,
1173) -> DuchonPartialFractionCoeffs {
1174    // 1/(ρ^{2p}(κ²+ρ²)^s) = Σ a_m/ρ^{2m} + Σ b_n/(κ²+ρ²)^n
1175    let mut a = vec![0.0_f64; p_order + 1]; // 1-based m
1176    let mut b = vec![0.0_f64; s_order + 1]; // 1-based n
1177    if s_order == 0 {
1178        if p_order > 0 {
1179            // Pure intrinsic polyharmonic case: no Matérn tail remains, so the
1180            // spectrum is exactly 1 / ρ^(2p).
1181            a[p_order] = 1.0;
1182        }
1183        return DuchonPartialFractionCoeffs { a, b };
1184    }
1185    for m in 1..=p_order {
1186        let sign = if (p_order - m).is_multiple_of(2) {
1187            1.0
1188        } else {
1189            -1.0
1190        };
1191        let expo = -2.0 * (s_order + p_order - m) as f64;
1192        let comb = binomial_f64(s_order + p_order - m - 1, p_order - m);
1193        a[m] = sign * kappa.powf(expo) * comb;
1194    }
1195    for n in 1..=s_order {
1196        let sign = if p_order.is_multiple_of(2) { 1.0 } else { -1.0 };
1197        let expo = -2.0 * (p_order + s_order - n) as f64;
1198        let comb = if p_order == 0 && n == s_order {
1199            // p=0 reduces to the pure Matérn block 1/(κ²+ρ²)^s.
1200            1.0
1201        } else {
1202            let top = p_order + s_order - n - 1;
1203            binomial_f64(top, s_order - n)
1204        };
1205        b[n] = sign * kappa.powf(expo) * comb;
1206    }
1207    DuchonPartialFractionCoeffs { a, b }
1208}
1209
1210/// 64-node Gauss–Legendre rule on `[0, 1]` (nodes already mapped from the
1211/// canonical `[-1, 1]` interval, weights scaled by the `1/2` Jacobian).
1212///
1213/// Used by [`duchon_hybrid_kernel_stable_integral`] to evaluate the hybrid
1214/// Duchon–Matérn kernel without the catastrophically-cancelling
1215/// partial-fraction sum (gam#1424). The integrand is smooth and strictly
1216/// positive on `(0, 1)`, so a fixed high-order rule reproduces the kernel to
1217/// ~1e-15 relative accuracy across all reachable high-dimensional orders.
1218fn gauss_legendre_01_64() -> &'static [(f64, f64)] {
1219    use std::sync::OnceLock;
1220    static NODES: OnceLock<Vec<(f64, f64)>> = OnceLock::new();
1221    NODES.get_or_init(|| {
1222        // Newton iteration on the Legendre polynomial roots (the classic
1223        // `gauleg` recipe). The N-point rule is symmetric about the midpoint, so
1224        // only the lower half of the roots is solved for and the rule is
1225        // mirrored. Computed once; converges to full f64 precision in a handful
1226        // of Newton steps per root.
1227        const N: usize = 64;
1228        let nf = N as f64;
1229        let mut nodes: Vec<(f64, f64)> = Vec::with_capacity(N);
1230        let half = N.div_ceil(2);
1231        for i in 0..half {
1232            // Initial guess for the i-th root on [-1, 1] (Chebyshev-like).
1233            let mut x = (std::f64::consts::PI * (i as f64 + 0.75) / (nf + 0.5)).cos();
1234            let mut dp = 0.0_f64;
1235            for _ in 0..100 {
1236                // Evaluate the Legendre polynomial P_N(x) and derivative P_N'(x)
1237                // via the three-term recurrence.
1238                let mut p0 = 1.0_f64;
1239                let mut p1 = x;
1240                for k in 2..=N {
1241                    let kf = k as f64;
1242                    let p2 = ((2.0 * kf - 1.0) * x * p1 - (kf - 1.0) * p0) / kf;
1243                    p0 = p1;
1244                    p1 = p2;
1245                }
1246                // P_N'(x) = N (x P_N(x) − P_{N−1}(x)) / (x² − 1).
1247                dp = nf * (x * p1 - p0) / (x * x - 1.0);
1248                let dx = p1 / dp;
1249                x -= dx;
1250                if dx.abs() <= 1e-16 * x.abs().max(1.0) {
1251                    break;
1252                }
1253            }
1254            // Gauss–Legendre weight: 2 / ((1 − x²) P_N'(x)²).
1255            let w = 2.0 / ((1.0 - x * x) * dp * dp);
1256            // x is the i-th root counting inward from +1; mirror to −x.
1257            nodes.push((x, w));
1258            if x.abs() > 1e-300 {
1259                nodes.push((-x, w));
1260            }
1261        }
1262        // Sort by node, then map [-1, 1] -> [0, 1] with the 1/2 Jacobian.
1263        nodes.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
1264        nodes
1265            .into_iter()
1266            .map(|(x, w)| (0.5 * (x + 1.0), 0.5 * w))
1267            .collect()
1268    })
1269}
1270
1271/// Evaluate the hybrid Duchon–Matérn kernel
1272/// `φ(r) = F^{-1}[ ρ^{-2p} (κ²+ρ²)^{-s} ](r)` via a single, cancellation-free
1273/// 1-D integral (gam#1424).
1274///
1275/// The partial-fraction expansion `Σ a_m/ρ^{2m} + Σ b_n/(κ²+ρ²)^n` evaluates
1276/// the radial kernel as an alternating sum of individually huge polyharmonic
1277/// (`r^{2m-d}`) and Matérn blocks whose leading singular parts cancel. For
1278/// high `d` (e.g. d=16, p=2, s=7) the largest block is ~1e3 while the true
1279/// value is ~1e-13, so f64 loses *every* significant digit and the resulting
1280/// Gram matrix is no longer PSD (λ_min ≈ −0.26 after normalization).
1281///
1282/// Using the Schwinger / Feynman parametrization of both rational factors and
1283/// performing the Gaussian (radial inverse-FT) integral analytically reduces
1284/// the kernel to
1285///
1286/// ```text
1287///   φ(r) = (4π)^{-d/2} / (Γ(p)Γ(s))
1288///          · ∫₀¹ (1-w)^{p-1} w^{s-1} · 2(B/A)^{b/2} K_b(2√(AB)) dw,
1289///   with  b = p + s − d/2,  A = w κ²,  B = r²/4.
1290/// ```
1291///
1292/// The integrand is smooth and strictly positive on `(0, 1)` (no cancellation),
1293/// so a fixed 64-point Gauss–Legendre rule is accurate to ~1e-15 relative.
1294/// The `r = 0` diagonal has the closed form
1295/// `φ(0) = (4π)^{-d/2} Γ(b)/(Γ(p)Γ(s)) κ^{-2b} B(s−b, p)`.
1296///
1297/// Requires `b = p + s − d/2 > 0` (kernel existence, `2(p+s) > d`) and
1298/// `s − b = d/2 − p > 0` (integrable `w → 0` endpoint), i.e. `2p < d`. Callers
1299/// must check [`duchon_hybrid_stable_integral_applies`] before invoking.
1300pub(crate) fn duchon_hybrid_kernel_stable_integral(
1301    r: f64,
1302    kappa: f64,
1303    p_order: usize,
1304    s_order: usize,
1305    k_dim: usize,
1306) -> Result<f64, BasisError> {
1307    assert!(
1308        duchon_hybrid_stable_integral_applies(p_order, s_order, k_dim),
1309        "duchon_hybrid_kernel_stable_integral precondition violated: 2(p+s) > d and 2p < d required (p={p_order}, s={s_order}, d={k_dim})"
1310    );
1311    let p = p_order as f64;
1312    let s = s_order as f64;
1313    let half_d = 0.5 * k_dim as f64;
1314    let b = p + s - half_d;
1315    let pref = (4.0 * std::f64::consts::PI).powf(-half_d) / (gamma_lanczos(p) * gamma_lanczos(s));
1316    if r == 0.0 {
1317        // φ(0) = pref · Γ(b) · κ^{-2b} · B(s−b, p),  B(x,y)=Γ(x)Γ(y)/Γ(x+y).
1318        let beta = gamma_lanczos(s - b) * gamma_lanczos(p) / gamma_lanczos(s - b + p);
1319        let value = pref * gamma_lanczos(b) * kappa.powf(-2.0 * b) * beta;
1320        if !value.is_finite() {
1321            crate::bail_invalid_basis!(
1322                "non-finite Duchon hybrid diagonal (stable form) for p={p_order}, s={s_order}, d={k_dim}"
1323            );
1324        }
1325        return Ok(value);
1326    }
1327    let mut acc = KahanSum::default();
1328    for &(w, weight) in gauss_legendre_01_64() {
1329        // Smooth term  2(B/A)^{b/2} K_b(2√(AB)) = 2 (r/(2κ√w))^b K_b(κ r √w).
1330        let sqrt_w = w.sqrt();
1331        let z = (kappa * r * sqrt_w).max(1e-300);
1332        let k_b = bessel_k_real_half_integer_or_integer(b.abs(), z)?;
1333        let smooth = 2.0 * (r / (2.0 * kappa * sqrt_w)).powf(b) * k_b;
1334        let factor = (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * smooth;
1335        acc.add(weight * factor);
1336    }
1337    let value = pref * acc.sum();
1338    if !value.is_finite() {
1339        crate::bail_invalid_basis!(
1340            "non-finite Duchon hybrid value (stable form) at r={r}, p={p_order}, s={s_order}, d={k_dim}"
1341        );
1342    }
1343    Ok(value)
1344}
1345
1346/// Radial operator scalars `(q, t, t_r, t_rr)` of the hybrid Duchon–Matérn
1347/// kernel via the same cancellation-free single integral as
1348/// [`duchon_hybrid_kernel_stable_integral`], differentiated under the integral
1349/// sign (gam#1424 / gam#1453).
1350///
1351/// The partial-fraction operator core (`duchon_regularized_operator_core`)
1352/// assembles `q, t` as a sign-alternating sum of polyharmonic and Matérn
1353/// *operator* blocks. In high dimensions (e.g. d=16, p=1, s=9) each block is
1354/// ~1e3 while the true operator scalar is ~1e-13, so f64 loses every
1355/// significant digit — Kahan summation fixes accumulation, not the
1356/// cancellation between huge opposing terms, leaving `q, t` with ~1e-2 relative
1357/// noise. That floor sits above the Chebyshev profile certificate, so the
1358/// production profile cannot certify (gam#1453).
1359///
1360/// This routine instead differentiates the smooth per-`w` integrand
1361/// `g(r,w) = 2 (r/(2c))^b K_b(c r)`, `c = κ√w`, in `r`. Each `w`-slice is a
1362/// single well-conditioned `r^a K_ν(c r)` term whose `r`-derivatives are exact
1363/// (`d/dr[r^a K_ν(c r)] = a r^{a-1} K_ν(c r) − (c/2) r^a (K_{ν-1}+K_{ν+1})`),
1364/// so there is no cross-block cancellation. The radial derivatives `φ′…φ⁗`
1365/// are integrated against the same `(1-w)^{p-1} w^{s-1}` weight and the
1366/// 64-node Gauss–Legendre rule, then the operator scalars are assembled from
1367/// the standard radial relations
1368/// `q = φ′/r`, `t = q′/r`, `t_r = (q″−t)/r`, `t_rr = q‴/r − 2q″/r² + 2q′/r³`.
1369///
1370/// Requires the same precondition as the kernel form
1371/// ([`duchon_hybrid_stable_integral_applies`]) and `r > 0`.
1372pub(crate) fn duchon_hybrid_operator_stable_integral(
1373    r: f64,
1374    kappa: f64,
1375    p_order: usize,
1376    s_order: usize,
1377    k_dim: usize,
1378) -> Result<DuchonRegularizedOperatorCore, BasisError> {
1379    assert!(
1380        duchon_hybrid_stable_integral_applies(p_order, s_order, k_dim),
1381        "duchon_hybrid_operator_stable_integral precondition violated: 2(p+s) > d and 2p < d required (p={p_order}, s={s_order}, d={k_dim})"
1382    );
1383    assert!(
1384        r > 0.0 && r.is_finite(),
1385        "duchon_hybrid_operator_stable_integral requires r > 0, got r={r}"
1386    );
1387    let p = p_order as f64;
1388    let s = s_order as f64;
1389    let half_d = 0.5 * k_dim as f64;
1390    let b = p + s - half_d;
1391    let pref = (4.0 * std::f64::consts::PI).powf(-half_d) / (gamma_lanczos(p) * gamma_lanczos(s));
1392
1393    // Accumulate φ′, φ″, φ‴, φ⁗ across the Gauss–Legendre nodes. (φ itself is
1394    // not needed for the operator scalars.)
1395    let mut d1 = KahanSum::default();
1396    let mut d2 = KahanSum::default();
1397    let mut d3 = KahanSum::default();
1398    let mut d4 = KahanSum::default();
1399
1400    for &(w, weight) in gauss_legendre_01_64() {
1401        let sqrt_w = w.sqrt();
1402        let c = (kappa * sqrt_w).max(1e-300);
1403        let z = (c * r).max(1e-300);
1404
1405        // Smooth integrand g(r) = A · r^b · K_b(c r),  A = 2 (2c)^{-b}.
1406        // Differentiate the symbolic term list (coef, a, ν-offset) in r:
1407        //   d/dr[c0 r^a K_{b+j}(c r)]
1408        //     = c0·a · r^{a-1} K_{b+j}(c r)
1409        //       − c0·(c/2) · r^a (K_{b+j-1}(c r) + K_{b+j+1}(c r)).
1410        // Four derivatives need ν-offsets in [-4, 4] around b.
1411        let a0 = 2.0 * (2.0 * c).powf(-b);
1412        let mut terms: Vec<(f64, f64, i32)> = vec![(a0, b, 0)];
1413        // Cache K_{b+j}(z) for j ∈ [-4, 4] (K is even in order → use |·|).
1414        let bessel = |j: i32| -> Result<f64, BasisError> {
1415            bessel_k_real_half_integer_or_integer((b + j as f64).abs(), z)
1416        };
1417        let evaluate = |terms: &Vec<(f64, f64, i32)>| -> Result<f64, BasisError> {
1418            let mut acc = KahanSum::default();
1419            for &(c0, a, j) in terms {
1420                if c0 == 0.0 {
1421                    continue;
1422                }
1423                acc.add(c0 * r.powf(a) * bessel(j)?);
1424            }
1425            Ok(acc.sum())
1426        };
1427
1428        let mut slice_derivs = [0.0_f64; 4];
1429        for slot in slice_derivs.iter_mut() {
1430            // Differentiate the current term list once.
1431            let mut next: Vec<(f64, f64, i32)> = Vec::with_capacity(terms.len() * 3);
1432            for &(c0, a, j) in &terms {
1433                if c0 == 0.0 {
1434                    continue;
1435                }
1436                if a != 0.0 {
1437                    next.push((c0 * a, a - 1.0, j));
1438                }
1439                let half = -c0 * c * 0.5;
1440                next.push((half, a, j - 1));
1441                next.push((half, a, j + 1));
1442            }
1443            terms = next;
1444            *slot = evaluate(&terms)?;
1445        }
1446
1447        d1.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[0]);
1448        d2.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[1]);
1449        d3.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[2]);
1450        d4.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[3]);
1451    }
1452
1453    let phi1 = pref * d1.sum();
1454    let phi2 = pref * d2.sum();
1455    let phi3 = pref * d3.sum();
1456    let phi4 = pref * d4.sum();
1457    if !(phi1.is_finite() && phi2.is_finite() && phi3.is_finite() && phi4.is_finite()) {
1458        crate::bail_invalid_basis!(
1459            "non-finite Duchon hybrid operator (stable form) at r={r}, p={p_order}, s={s_order}, d={k_dim}"
1460        );
1461    }
1462
1463    // Assemble the operator scalars from the radial derivatives. For r > 0
1464    // these divisions are removable-singularity quotients of moderate
1465    // quantities (no cancellation between blocks remains).
1466    let inv_r = 1.0 / r;
1467    let q = phi1 * inv_r;
1468    // q′ = φ″/r − φ′/r²; q″ = φ‴/r − 2φ″/r² + 2φ′/r³;
1469    // q‴ = φ⁗/r − 3φ‴/r² + 6φ″/r³ − 6φ′/r⁴.
1470    let q_r = phi2 * inv_r - phi1 * inv_r * inv_r;
1471    let q_rr = phi3 * inv_r - 2.0 * phi2 * inv_r * inv_r + 2.0 * phi1 * inv_r * inv_r * inv_r;
1472    let q_rrr = phi4 * inv_r - 3.0 * phi3 * inv_r * inv_r + 6.0 * phi2 * inv_r * inv_r * inv_r
1473        - 6.0 * phi1 * inv_r * inv_r * inv_r * inv_r;
1474    let t = q_r * inv_r;
1475    let t_r = q_rr * inv_r - q_r * inv_r * inv_r;
1476    let t_rr = q_rrr * inv_r - 2.0 * q_rr * inv_r * inv_r + 2.0 * q_r * inv_r * inv_r * inv_r;
1477
1478    Ok(DuchonRegularizedOperatorCore { q, t, t_r, t_rr })
1479}
1480
1481/// Whether the cancellation-free [`duchon_hybrid_kernel_stable_integral`] is
1482/// applicable for these orders: a genuine Matérn blend (`s ≥ 1`) whose
1483/// single-integral reduction has an integrable `w → 0` endpoint (`2p < d`).
1484///
1485/// The complementary cases — `s = 0` (pure polyharmonic, already evaluated
1486/// directly with no cancellation) and `2p ≥ d` (only reachable at low `d`,
1487/// where the partial-fraction sum has no meaningful cancellation) — retain the
1488/// existing partial-fraction path.
1489#[inline]
1490pub(crate) fn duchon_hybrid_stable_integral_applies(
1491    p_order: usize,
1492    s_order: usize,
1493    k_dim: usize,
1494) -> bool {
1495    s_order >= 1 && 2 * p_order < k_dim
1496}
1497
1498pub(crate) fn duchon_matern_kernel_general_from_distance(
1499    r: f64,
1500    length_scale: Option<f64>,
1501    p_order: usize,
1502    s_order: usize,
1503    k_dim: usize,
1504    coeffs: Option<&DuchonPartialFractionCoeffs>,
1505) -> Result<f64, BasisError> {
1506    if !r.is_finite() || r < 0.0 {
1507        crate::bail_invalid_basis!("Duchon kernel distance must be finite and non-negative");
1508    }
1509    let Some(length_scale) = length_scale else {
1510        return Ok(polyharmonic_kernel(
1511            r,
1512            pure_duchon_block_order(p_order, s_order as f64),
1513            k_dim,
1514        ));
1515    };
1516    if !length_scale.is_finite() || length_scale <= 0.0 {
1517        crate::bail_invalid_basis!("Duchon hybrid length_scale must be finite and positive");
1518    }
1519    let kappa = 1.0 / length_scale;
1520
1521    // gam#1424: for genuine high-dimensional Matérn blends the partial-fraction
1522    // sum below cancels catastrophically (the largest block dwarfs the true
1523    // ~1e-13 kernel value, destroying every significant digit and the PSD
1524    // property of the Gram matrix). Evaluate those orders with the
1525    // cancellation-free single-integral form instead — it also handles the
1526    // `r = 0` diagonal in closed form, so it short-circuits before the
1527    // near-collision Taylor branch.
1528    if duchon_hybrid_stable_integral_applies(p_order, s_order, k_dim) {
1529        return duchon_hybrid_kernel_stable_integral(r, kappa, p_order, s_order, k_dim);
1530    }
1531
1532    let coeffs_local;
1533    let coeffs_ref = if let Some(c) = coeffs {
1534        c
1535    } else {
1536        coeffs_local = duchon_partial_fraction_coeffs(p_order, s_order, kappa);
1537        &coeffs_local
1538    };
1539    let collision_taylor_radius = DUCHON_COLLISION_TAYLOR_REL * length_scale.max(1e-8);
1540    // The near-collision Taylor expansion uses phi(0) plus even-order
1541    // derivative collision limits. Those limits only exist when the kernel
1542    // is finite at the origin, i.e. when 2(p+s) > d. Below that threshold
1543    // the partial-fraction blocks individually diverge at r=0 but their
1544    // sum is still a well-defined function for any r > 0 (each Bessel-K
1545    // and r^{2m-d}-type block is finite away from origin). Fall through
1546    // to the direct sum in that regime; r=0 itself remains an error.
1547    let kernel_finite_at_origin = 2 * (p_order + s_order) > k_dim;
1548    if r <= collision_taylor_radius && kernel_finite_at_origin {
1549        return duchon_hybrid_kernel_near_collision_value(
1550            r,
1551            length_scale,
1552            p_order,
1553            s_order,
1554            k_dim,
1555            coeffs_ref,
1556        );
1557    }
1558    let mut val = KahanSum::default();
1559    for (m, coeff) in coeffs_ref.a.iter().enumerate().skip(1) {
1560        if *coeff == 0.0 {
1561            continue;
1562        }
1563        val.add(coeff * polyharmonic_kernel(r, (m) as f64, k_dim));
1564    }
1565    for (n, coeff) in coeffs_ref.b.iter().enumerate().skip(1) {
1566        if *coeff == 0.0 {
1567            continue;
1568        }
1569        val.add(coeff * duchon_matern_block(r, kappa, n, k_dim)?);
1570    }
1571    Ok(val.sum())
1572}
1573
1574pub(crate) fn duchon_hybrid_kernel_collision_value(
1575    length_scale: f64,
1576    p_order: usize,
1577    s_order: usize,
1578    k_dim: usize,
1579    coeffs: &DuchonPartialFractionCoeffs,
1580) -> Result<f64, BasisError> {
1581    let spectral_order = 2 * (p_order + s_order);
1582    if spectral_order <= k_dim {
1583        crate::bail_invalid_basis!(
1584            "Duchon hybrid diagonal is not finite when 2*(p+s) <= dimension; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1585        );
1586    }
1587
1588    let kappa = 1.0 / length_scale.max(1e-300);
1589    let mut pure = KahanSum::default();
1590    let mut log_part = KahanSum::default();
1591    for (m, &a_m) in coeffs.a.iter().enumerate().skip(1) {
1592        if a_m == 0.0 {
1593            continue;
1594        }
1595        let (block_pure, block_log) = duchon_polyharmonic_block_taylor_r2j(m, k_dim, 0);
1596        pure.add(a_m * block_pure);
1597        log_part.add(a_m * block_log);
1598    }
1599    for (n, &b_n) in coeffs.b.iter().enumerate().skip(1) {
1600        if b_n == 0.0 {
1601            continue;
1602        }
1603        let (block_pure, block_log) = duchon_matern_block_taylor_r2j(kappa, n, k_dim, 0);
1604        pure.add(b_n * block_pure);
1605        log_part.add(b_n * block_log);
1606    }
1607    let value = pure.sum();
1608    let log_value = log_part.sum();
1609    if log_value.abs() > 1e-8 * value.abs().max(1e-30) {
1610        crate::bail_invalid_basis!(
1611            "Duchon hybrid diagonal log terms did not cancel: log={log_value:.6e}, value={value:.6e}; p={p_order}, s={s_order}, d={k_dim}"
1612        );
1613    }
1614    if !value.is_finite() {
1615        crate::bail_invalid_basis!(
1616            "non-finite Duchon hybrid diagonal value for p={p_order}, s={s_order}, d={k_dim}"
1617        );
1618    }
1619    Ok(value)
1620}
1621
1622pub(crate) fn duchon_hybrid_kernel_near_collision_value(
1623    r: f64,
1624    length_scale: f64,
1625    p_order: usize,
1626    s_order: usize,
1627    k_dim: usize,
1628    coeffs: &DuchonPartialFractionCoeffs,
1629) -> Result<f64, BasisError> {
1630    let mut value =
1631        duchon_hybrid_kernel_collision_value(length_scale, p_order, s_order, k_dim, coeffs)?;
1632    if r == 0.0 {
1633        return Ok(value);
1634    }
1635
1636    // Radial Taylor expansion about the center collision:
1637    //
1638    //   phi(r) = phi(0)
1639    //          + phi''(0) r^2 / 2
1640    //          + phi''''(0) r^4 / 24
1641    //          + phi''''''(0) r^6 / 720 + ...
1642    //
1643    // Odd terms vanish for an isotropic radial kernel. A finite 2q-th
1644    // derivative at zero requires spectral smoothness 2(p+s) > d + 2q.
1645    // Terms whose collision derivative does not exist are omitted; this is
1646    // still strictly better than evaluating the raw partial-fraction sum at a
1647    // tiny nonzero radius, where large singular components cancel only after
1648    // losing many digits.
1649    let smoothness_order = 2 * (p_order + s_order);
1650    let r2 = r * r;
1651    if smoothness_order > k_dim + 2 {
1652        let (phi_rr, _, _) =
1653            duchonphi_rr_collision_psi_triplet(length_scale, p_order, s_order, k_dim, coeffs)?;
1654        value += 0.5 * phi_rr * r2;
1655    }
1656    if smoothness_order > k_dim + 4 {
1657        let phi_rrrr = duchon_phi_rrrr_collision(length_scale, p_order, s_order, k_dim, coeffs)?;
1658        value += (1.0 / 24.0) * phi_rrrr * r2 * r2;
1659    }
1660    if smoothness_order > k_dim + 6 {
1661        let phi_rrrrrr =
1662            duchon_phi_rrrrrr_collision(length_scale, p_order, s_order, k_dim, coeffs)?;
1663        value += (1.0 / 720.0) * phi_rrrrrr * r2 * r2 * r2;
1664    }
1665    if !value.is_finite() {
1666        crate::bail_invalid_basis!(
1667            "non-finite Duchon hybrid near-collision value at r={r}, p={p_order}, s={s_order}, d={k_dim}"
1668        );
1669    }
1670    Ok(value)
1671}
1672
1673#[inline(always)]
1674pub(crate) fn stable_euclidean_norm<I>(components: I) -> f64
1675where
1676    I: IntoIterator<Item = f64>,
1677{
1678    let mut scale = 0.0_f64;
1679    let mut sumsq = 1.0_f64;
1680    let mut has_nonzero = false;
1681    for component in components {
1682        let abs = component.abs();
1683        if abs == 0.0 {
1684            continue;
1685        }
1686        if !abs.is_finite() {
1687            return f64::INFINITY;
1688        }
1689        if !has_nonzero {
1690            scale = abs;
1691            has_nonzero = true;
1692            continue;
1693        }
1694        if scale < abs {
1695            let ratio = scale / abs;
1696            sumsq = 1.0 + sumsq * ratio * ratio;
1697            scale = abs;
1698        } else {
1699            let ratio = abs / scale;
1700            sumsq += ratio * ratio;
1701        }
1702    }
1703    if has_nonzero {
1704        scale * sumsq.sqrt()
1705    } else {
1706        0.0
1707    }
1708}
1709
1710#[inline]
1711pub(crate) fn centered_aniso_log_scale_mean(eta: &[f64]) -> f64 {
1712    if eta.len() <= 1 {
1713        0.0
1714    } else {
1715        eta.iter().sum::<f64>() / eta.len() as f64
1716    }
1717}
1718
1719#[inline]
1720pub(crate) fn centered_aniso_log_scale(value: f64, mean: f64) -> f64 {
1721    // This bound exists solely to keep the downstream `.exp()` (axis scale and
1722    // metric weight) finite. `f64::clamp` leaves NaN as NaN, so a non-finite
1723    // contrast (e.g. an `inf − inf` from a degenerate anisotropy `eta`) would
1724    // slip through and poison the Gram matrix. Map any non-finite difference to
1725    // the saturating bound explicitly; finite inputs take the identical clamp.
1726    let centered = value - mean;
1727    if centered.is_finite() {
1728        centered.clamp(-50.0, 50.0)
1729    } else if centered > 0.0 {
1730        50.0
1731    } else {
1732        -50.0
1733    }
1734}
1735
1736#[inline]
1737pub(crate) fn aniso_axis_scale(value: f64, mean: f64) -> f64 {
1738    centered_aniso_log_scale(value, mean).exp()
1739}
1740
1741#[inline]
1742pub(crate) fn aniso_metric_weight(value: f64, mean: f64) -> f64 {
1743    (2.0 * centered_aniso_log_scale(value, mean)).exp()
1744}
1745
1746pub(crate) fn centered_aniso_metric_weights(eta: &[f64]) -> Vec<f64> {
1747    let mean = centered_aniso_log_scale_mean(eta);
1748    eta.iter()
1749        .map(|&value| aniso_metric_weight(value, mean))
1750        .collect()
1751}
1752
1753/// Compute anisotropic squared distance components and total distance.
1754///
1755/// This is the core of **geometric anisotropy**: a linear warp Λ = diag(κ_a)
1756/// turns ellipsoidal correlation contours into isotropic ones. Writing h = x − c,
1757/// z = Λh, the anisotropic distance is r = |z| = |Λh|.
1758///
1759/// We decompose Λ = κ · A where det(A) = 1, parameterized as
1760///   ψ_a = ψ̄ + η_a,   Σ η_a = 0
1761/// where ψ̄ is the global scale (existing scalar κ) and η_a are d−1 anisotropy
1762/// contrasts. This separates scale from shape and preserves the Duchon scaling
1763/// law φ(r;κ) = κ^δ H(κr) for the global part.
1764///
1765/// Given per-axis log-scales `eta`, the identifiable centered contrasts are
1766/// ψ_a = eta_a - mean(eta). The metric uses those contrasts so Σ_a ψ_a = 0
1767/// even when a caller passes an uncentered vector:
1768///
1769///   r = √( Σ_a exp(2·ψ_a) · (x_a - c_a)² )
1770///
1771/// Returns `(r, s_vec)` where `s_vec[a] = exp(2·ψ_a) · h_a²` is the
1772/// per-axis weighted squared displacement. These components are needed for
1773/// per-axis derivatives: `∂φ/∂ψ_a = q · s_a`.
1774///
1775/// The derivative chain through r gives:
1776///   ∇_ψ r      = s / r
1777///   ∇²_ψ r     = (2/r) Diag(s) − (1/r³) ss'
1778/// which is diagonal + rank-1, so Hessian-vector products are O(d).
1779#[inline]
1780pub(crate) fn aniso_distance_and_components(
1781    data_row: &[f64],
1782    center: &[f64],
1783    eta: &[f64],
1784) -> (f64, Vec<f64>) {
1785    assert_eq!(data_row.len(), center.len());
1786    assert_eq!(data_row.len(), eta.len());
1787    let d = data_row.len();
1788    let eta_mean = centered_aniso_log_scale_mean(eta);
1789    let mut s_vec = Vec::with_capacity(d);
1790    let mut scaled_components = Vec::with_capacity(d);
1791    for a in 0..d {
1792        let h_a = data_row[a] - center[a];
1793        // Clamp exp(2ψ) to avoid overflow/underflow: ψ in [-50, 50].
1794        let scale_a = aniso_axis_scale(eta[a], eta_mean);
1795        let scaled_h_a = scale_a * h_a;
1796        let s_a = scaled_h_a * scaled_h_a;
1797        scaled_components.push(scaled_h_a);
1798        s_vec.push(s_a);
1799    }
1800    (stable_euclidean_norm(scaled_components), s_vec)
1801}
1802
1803/// Compute anisotropic distance without returning per-axis components.
1804///
1805/// This is the lightweight version of [`aniso_distance_and_components`] for
1806/// call sites that only need the scalar distance `r`.
1807#[inline]
1808pub(crate) fn aniso_distance(data_row: &[f64], center: &[f64], eta: &[f64]) -> f64 {
1809    assert_eq!(data_row.len(), center.len());
1810    assert_eq!(data_row.len(), eta.len());
1811    let eta_mean = centered_aniso_log_scale_mean(eta);
1812    stable_euclidean_norm(
1813        (0..data_row.len()).map(|a| aniso_axis_scale(eta[a], eta_mean) * (data_row[a] - center[a])),
1814    )
1815}
1816
1817#[inline(always)]
1818pub(crate) fn euclidean_distance_rows(
1819    lhs: ArrayView2<'_, f64>,
1820    lhs_row: usize,
1821    rhs: ArrayView2<'_, f64>,
1822    rhs_row: usize,
1823) -> f64 {
1824    assert_eq!(lhs.ncols(), rhs.ncols());
1825    stable_euclidean_norm((0..lhs.ncols()).map(|axis| lhs[[lhs_row, axis]] - rhs[[rhs_row, axis]]))
1826}
1827
1828#[inline(always)]
1829pub(crate) fn aniso_axis_scales(eta: &[f64]) -> Vec<f64> {
1830    let eta_mean = centered_aniso_log_scale_mean(eta);
1831    eta.iter()
1832        .map(|&value| aniso_axis_scale(value, eta_mean))
1833        .collect()
1834}
1835
1836#[inline(always)]
1837pub(crate) fn aniso_distance_rows_with_scales(
1838    lhs: ArrayView2<'_, f64>,
1839    lhs_row: usize,
1840    rhs: ArrayView2<'_, f64>,
1841    rhs_row: usize,
1842    axis_scales: &[f64],
1843) -> f64 {
1844    assert_eq!(lhs.ncols(), rhs.ncols());
1845    assert_eq!(lhs.ncols(), axis_scales.len());
1846    stable_euclidean_norm(
1847        (0..lhs.ncols())
1848            .map(|axis| axis_scales[axis] * (lhs[[lhs_row, axis]] - rhs[[rhs_row, axis]])),
1849    )
1850}
1851
1852pub(crate) fn fill_symmetric_from_row_kernel<F>(
1853    matrix: &mut Array2<f64>,
1854    kernel: F,
1855) -> Result<(), BasisError>
1856where
1857    F: Fn(usize, usize) -> Result<f64, BasisError> + Sync,
1858{
1859    assert_eq!(matrix.nrows(), matrix.ncols());
1860    let k = matrix.nrows();
1861    // The kernels passed here are pure functions of the (symmetric) pairwise
1862    // center distance, so `kernel(i, j) == kernel(j, i)`. Evaluate only the
1863    // upper triangle (including the diagonal) in parallel — each row task
1864    // touches only its own `j >= i` cells, so the borrows stay disjoint — then
1865    // mirror into the lower triangle. This halves the (sqrt + special-function)
1866    // kernel evaluations relative to filling every cell independently, with no
1867    // change to the resulting matrix (still exactly symmetric).
1868    matrix
1869        .axis_iter_mut(Axis(0))
1870        .into_par_iter()
1871        .enumerate()
1872        .try_for_each(|(i, mut row)| {
1873            for j in i..k {
1874                row[j] = kernel(i, j)?;
1875            }
1876            Ok::<(), BasisError>(())
1877        })?;
1878    for i in 1..k {
1879        for j in 0..i {
1880            matrix[[i, j]] = matrix[[j, i]];
1881        }
1882    }
1883    Ok(())
1884}
1885
1886/// Return y-space points `y_{i,a} = exp(ψ_a) x_{i,a}` with
1887/// `ψ_a = η_a - mean(η)` so Euclidean pairwise
1888/// distances in y equal anisotropic kernel distances in x:
1889///   |y_i - y_j|² = Σ_a exp(2 ψ_a) (x_{i,a} - x_{j,a})² = aniso_distance²(x_i, x_j, η).
1890/// Use this before `pairwise_distance_bounds` whenever κ conditioning
1891/// bounds must match the kernel's actual metric (anisotropic case). For
1892/// isotropic terms, pass `None` and keep using the raw centers.
1893pub(crate) fn points_in_aniso_y_space(points: ArrayView2<'_, f64>, eta: &[f64]) -> Array2<f64> {
1894    assert_eq!(points.ncols(), eta.len());
1895    let mut y = points.to_owned();
1896    let eta_mean = centered_aniso_log_scale_mean(eta);
1897    let weights: Vec<f64> = eta.iter().map(|&e| aniso_axis_scale(e, eta_mean)).collect();
1898    for a in 0..eta.len() {
1899        let w_a = weights[a];
1900        y.column_mut(a).mapv_inplace(|v| v * w_a);
1901    }
1902    y
1903}
1904
1905/// Compute per-axis standard deviations of knot center coordinates.
1906///
1907/// Returns σ_a for each axis column of `centers`. Axes with zero variance
1908/// (constant column) get σ_a = 1.0. All values are clamped to [1e-6, 1e6].
1909pub fn knot_cloud_axis_scales(centers: ArrayView2<'_, f64>) -> Vec<f64> {
1910    let k = centers.nrows();
1911    let d = centers.ncols();
1912    if k < 2 || d == 0 {
1913        return vec![1.0; d];
1914    }
1915    let n = k as f64;
1916    let mut scales = Vec::with_capacity(d);
1917    for a in 0..d {
1918        let col = centers.column(a);
1919        let mean = col.sum() / n;
1920        let var = col.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / (n - 1.0);
1921        let sigma = var.sqrt();
1922        // If variance is zero (constant column), use 1.0 (no scaling).
1923        let sigma = if sigma < 1e-12 { 1.0 } else { sigma };
1924        scales.push(sigma.clamp(1e-6, 1e6));
1925    }
1926    scales
1927}
1928
1929/// Compute initial anisotropy contrasts η_a from knot center geometry.
1930///
1931/// Returns η_a = −ln(σ_a) + (1/d) Σ_b ln(σ_b), which satisfies Ση_a = 0
1932/// by construction. Axes with more spread get negative η_a (smaller κ_a,
1933/// longer correlation range), axes with less spread get positive η_a.
1934///
1935/// If d ≤ 1, returns an empty vector (anisotropy is meaningless for 1-D).
1936pub fn initial_aniso_contrasts(centers: ArrayView2<'_, f64>) -> Vec<f64> {
1937    let d = centers.ncols();
1938    if d <= 1 {
1939        return Vec::new();
1940    }
1941    let scales = knot_cloud_axis_scales(centers);
1942    let mean_neg_log: f64 = scales.iter().map(|&s| -s.ln()).sum::<f64>() / d as f64;
1943    // η_a = −ln(σ_a) + (1/d) Σ_b ln(σ_b)
1944    //     = −ln(σ_a) − mean(−ln(σ_b))
1945    //     = neg_log_scales[a] − mean(neg_log_scales)
1946    scales
1947        .iter()
1948        .map(|&scale| -scale.ln() - mean_neg_log)
1949        .collect()
1950}
1951
1952/// Pure forward transform of the supplied anisotropy log-scales: subtract the
1953/// mean (so Σ η = 0) and zero tiny residuals. `None` (or a 1-D problem, where
1954/// centering is a no-op) means *no* anisotropy.
1955///
1956/// This is a **continuous function of η with no hidden data dependence**: an
1957/// explicit all-zero vector centers to all-zero, i.e. the isotropic metric
1958/// (weights `exp(2·0) = 1`, Euclidean radius). It is therefore identical, as a
1959/// design, to the `None` path through `η = 0`, and is continuous across it —
1960/// `[1e-9, -1e-9]` and `[0, 0]` map to neighboring designs, not a jump.
1961///
1962/// The Matérn input-location jet/Hessian (`matern_metric_weights`, the public
1963/// `matern_input_location_first_jet`/`_hessian` FFI) and the `UserProvided`-center
1964/// forward design both apply *this* transform, so the jet differentiates exactly
1965/// the function the public design evaluates (#437), and an explicit isotropic
1966/// request reduces to the closed-form isotropic Matérn kernel rather than a
1967/// data-driven anisotropic one (#1042).
1968///
1969/// Auto-initialization of `η` from knot-cloud geometry is a *separate* concern
1970/// handled by [`auto_seed_aniso_contrasts`]; it is reserved for callers that
1971/// opt into data-derived geometry (the κ-optimizer's data-driven center
1972/// strategies and the pure-Duchon `scale_dims` path), selected by
1973/// [`resolve_matern_forward_aniso`].
1974pub(crate) fn centered_aniso_contrasts(aniso: Option<&[f64]>) -> Option<Vec<f64>> {
1975    match aniso {
1976        Some(v) if v.len() > 1 => Some(center_aniso_log_scales(v)),
1977        Some(v) => Some(v.to_vec()),
1978        None => None,
1979    }
1980}
1981
1982/// Auto-seed anisotropy contrasts from knot-cloud geometry for callers that use
1983/// an all-zero vector as the "initialize me" sentinel.
1984///
1985/// Used by (a) the pure-Duchon `scale_dims` path, where `η` is a FIXED,
1986/// geometry-derived basis parameter that is never enrolled as a REML hyper-axis
1987/// (see `spatial_term_supports_hyper_optimization`): "standardize the geometry,
1988/// then learn the smoothness"; and (b) the Matérn forward design when the term
1989/// uses a **data-driven** center strategy, i.e. the κ-optimizer's seeding
1990/// sentinel (the optimizer's analytic ψ-gradient is computed against the same
1991/// auto-seeded design, so the pair stays consistent). A non-zero (or absent)
1992/// vector is honored verbatim (centered, exactly like [`centered_aniso_contrasts`]);
1993/// only an *exactly* all-zero vector is replaced by `initial_aniso_contrasts(centers)`.
1994///
1995/// A `UserProvided`-center Matérn term does NOT use this — its geometry is fully
1996/// caller-specified, so an explicit all-zero η must be honored literally; folding
1997/// the geometry seed into that path made the public design discontinuous at
1998/// `η = 0` and hijacked explicit isotropic requests (#1042).
1999pub(crate) fn auto_seed_aniso_contrasts(
2000    centers: ArrayView2<'_, f64>,
2001    aniso: Option<&[f64]>,
2002) -> Option<Vec<f64>> {
2003    let eta = match aniso {
2004        Some(v) if v.len() > 1 => v,
2005        Some(v) => return Some(v.to_vec()),
2006        None => return None,
2007    };
2008    let all_zero = eta.iter().all(|&e| e == 0.0);
2009    if !all_zero {
2010        return Some(center_aniso_log_scales(eta));
2011    }
2012    let contrasts = initial_aniso_contrasts(centers);
2013    if contrasts.is_empty() {
2014        Some(center_aniso_log_scales(eta))
2015    } else {
2016        Some(center_aniso_log_scales(&contrasts))
2017    }
2018}
2019
2020fn center_aniso_log_scales(eta: &[f64]) -> Vec<f64> {
2021    if eta.len() <= 1 {
2022        return eta.to_vec();
2023    }
2024    let mean = eta.iter().sum::<f64>() / eta.len() as f64;
2025    eta.iter()
2026        .map(|&v| {
2027            let centered = v - mean;
2028            if centered.abs() <= 1e-15 {
2029                0.0
2030            } else {
2031                centered
2032            }
2033        })
2034        .collect()
2035}
2036
2037/// How the Matérn forward design build interprets an *exactly all-zero*
2038/// `aniso_log_scales` vector.
2039#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2040pub enum AnisoSeedMode {
2041    /// All-zero `η` is the κ-optimizer / `scale_dims` seeding sentinel: replace
2042    /// it with geometry-derived contrasts from the knot cloud
2043    /// ([`auto_seed_aniso_contrasts`]). This is the default for every internal
2044    /// build entry; the optimizer's analytic ψ-gradient is computed against the
2045    /// same auto-seeded design, so value/gradient stay consistent. Note that by
2046    /// the time the κ-optimizer rebuilds a frozen design the center strategy has
2047    /// usually been resolved to `UserProvided`, so center provenance cannot be
2048    /// used to distinguish this from a genuine literal request — the mode must
2049    /// be carried explicitly.
2050    AutoSeedFromGeometry,
2051    /// All-zero `η` is an explicit isotropic request and is honored literally
2052    /// ([`centered_aniso_contrasts`]): the design reduces to the closed-form
2053    /// isotropic Matérn and varies continuously through `η = 0`. The public
2054    /// `matern_basis` FFI (and its input-location jet/Hessian) selects this so a
2055    /// caller's explicit isotropic request is not hijacked into a data-driven
2056    /// anisotropic kernel (#1042).
2057    Literal,
2058}
2059
2060/// Resolve the anisotropy contrasts the Matérn forward design build applies,
2061/// dispatching on the explicit [`AnisoSeedMode`].
2062pub(crate) fn resolve_matern_forward_aniso(
2063    mode: AnisoSeedMode,
2064    centers: ArrayView2<'_, f64>,
2065    aniso: Option<&[f64]>,
2066) -> Option<Vec<f64>> {
2067    match mode {
2068        AnisoSeedMode::Literal => centered_aniso_contrasts(aniso),
2069        AnisoSeedMode::AutoSeedFromGeometry => auto_seed_aniso_contrasts(centers, aniso),
2070    }
2071}
2072
2073pub(crate) fn pairwise_distance_bounds(points: ArrayView2<'_, f64>) -> Option<(f64, f64)> {
2074    let n = points.nrows();
2075    let d = points.ncols();
2076    if n < 2 || d == 0 {
2077        return None;
2078    }
2079    let mut r_min = f64::INFINITY;
2080    let mut r_max = 0.0_f64;
2081    for i in 0..n {
2082        for j in (i + 1)..n {
2083            let r = stable_euclidean_norm((0..d).map(|c| points[[i, c]] - points[[j, c]]));
2084            if r.is_finite() && r > 0.0 {
2085                r_min = r_min.min(r);
2086                r_max = r_max.max(r);
2087            }
2088        }
2089    }
2090    if r_min.is_finite() && r_max.is_finite() && r_min > 0.0 && r_max > 0.0 {
2091        Some((r_min, r_max))
2092    } else {
2093        None
2094    }
2095}
2096
2097/// Capped-sample pairwise distance bounds for large point clouds.
2098///
2099/// Returns `(r_min_hat, r_max_hat)` such that:
2100/// - `r_max_hat <= true r_max`  (pairwise max over a sub-sample is monotone
2101///    in the sample, so the sampled max underestimates the true max).
2102/// - `r_min_hat >= true r_min`  (pairwise min over a sub-sample can only
2103///    exclude some pairs, so the sampled min overestimates the true min).
2104///
2105/// Both approximations are conservative for κ-bound derivation:
2106///   kappa_lo = 1e-2 / r_max_hat  >=  1e-2 / true r_max  (wider window, low κ)
2107///   kappa_hi = 1e2  / r_min_hat  <=  1e2  / true r_min  (tighter window, high κ)
2108/// so no feasible κ that the exact bound would include is excluded by the
2109/// approximation — it can only slightly shrink the high-κ tail, which is
2110/// exactly the regime (κ → ∞ ⇒ degenerate kernel) that we want the outer
2111/// optimizer to avoid anyway.
2112///
2113/// Sampling is deterministic stride (points indexed 0, stride, 2·stride, …).
2114/// For a cap of `K = 1024` and n up to ~10⁹ this yields O(K²·d) work per
2115/// call — a few hundred μs. For n < K the exact pairwise is used.
2116pub(crate) fn pairwise_distance_bounds_sampled(points: ArrayView2<'_, f64>) -> Option<(f64, f64)> {
2117    const K_CAP: usize = 1024;
2118    let n = points.nrows();
2119    let d = points.ncols();
2120    if n < 2 || d == 0 {
2121        return None;
2122    }
2123    if n <= K_CAP {
2124        return pairwise_distance_bounds(points);
2125    }
2126    // Deterministic stride sampling: pick K_CAP evenly spaced indices.
2127    // This preserves any spatial stratification already present in the
2128    // data ordering (large-scale data is typically in insertion order, not
2129    // spatially stratified, so stride sampling is effectively uniform).
2130    let stride = n / K_CAP;
2131    let k = K_CAP; // exactly K_CAP samples by construction (stride rounds down)
2132    let mut r_min = f64::INFINITY;
2133    let mut r_max = 0.0_f64;
2134    for i_idx in 0..k {
2135        let i = i_idx * stride;
2136        for j_idx in (i_idx + 1)..k {
2137            let j = j_idx * stride;
2138            let r = stable_euclidean_norm((0..d).map(|c| points[[i, c]] - points[[j, c]]));
2139            if r.is_finite() && r > 0.0 {
2140                r_min = r_min.min(r);
2141                r_max = r_max.max(r);
2142            }
2143        }
2144    }
2145    if r_min.is_finite() && r_max.is_finite() && r_min > 0.0 && r_max > 0.0 {
2146        Some((r_min, r_max))
2147    } else {
2148        None
2149    }
2150}
2151
2152#[cfg(test)]
2153mod duchon_hybrid_psd_tests {
2154    use super::*;
2155    use faer::Side;
2156    use gam_linalg::faer_ndarray::FaerEigh;
2157
2158    /// Deterministic, well-separated centers on `[-1, 1]^d` (a Halton-style
2159    /// low-discrepancy lattice over the radical-inverse base sequence). Mirrors
2160    /// the `4*d` random centers the Python fixture
2161    /// (`tests/test_python_api.py`'s high-dimensional hybrid Duchon penalty PSD
2162    /// check) draws, but without an RNG so the regression is byte-stable.
2163    fn fixture_centers(d: usize, n: usize) -> Array2<f64> {
2164        const BASES: [u64; 24] = [
2165            2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
2166            89,
2167        ];
2168        let mut centers = Array2::<f64>::zeros((n, d));
2169        for i in 0..n {
2170            for axis in 0..d {
2171                let base = BASES[axis % BASES.len()];
2172                // Van der Corput radical inverse of (i + 1) in `base`, mapped to
2173                // [-1, 1]. Different axes use different primes, so the cloud is
2174                // affinely full-rank and spans the linear null space.
2175                let mut f = 1.0_f64;
2176                let mut idx = (i + 1) as u64;
2177                let mut value = 0.0_f64;
2178                while idx > 0 {
2179                    f /= base as f64;
2180                    value += f * (idx % base) as f64;
2181                    idx /= base;
2182                }
2183                centers[[i, axis]] = 2.0 * value - 1.0;
2184            }
2185        }
2186        centers
2187    }
2188
2189    /// Smallest symmetric eigenvalue of `matrix` (the matrix is symmetrized
2190    /// first; the constrained Duchon penalty is symmetric by construction).
2191    fn lambda_min(matrix: &Array2<f64>) -> f64 {
2192        let sym = symmetrize_penalty(matrix);
2193        let (evals, _) = FaerEigh::eigh(&sym, Side::Lower).expect("symmetric eigendecomposition");
2194        evals.iter().copied().fold(f64::INFINITY, f64::min)
2195    }
2196
2197    /// gam#1424: the (d=16, m=2, s=7) hybrid Duchon–Matérn fixture used to lose
2198    /// positive definiteness through catastrophic cancellation in the
2199    /// partial-fraction kernel expansion — the constrained, post-normalization
2200    /// penalty had λ_min ≈ −0.26442 even though the kernel's spectral density
2201    /// `ρ^{-2p}(κ²+ρ²)^{-s}` is nonnegative (so the true penalty is PSD). The
2202    /// kernel now routes through the cancellation-free single-integral form, so
2203    /// the spectrum is numerically PSD. This mirrors the production penalty path
2204    /// `duchon_constrained_bending_penalty` → `normalize_penalty`.
2205    #[test]
2206    fn high_dim_hybrid_penalty_is_numerically_psd_1424() {
2207        let d = 16usize;
2208        // m=2 ⇒ Linear null space. The cubic default spectral power is the
2209        // fractional (d-1)/2 = 7.5; the production hybrid config resolves it to
2210        // the integer spectral order the closed-form kernel consumes, s = 7
2211        // (`duchon_constrained_bending_penalty` itself takes the integer view via
2212        // `duchon_power_to_usize`, and the reroute predicate needs s ≥ 1). This is
2213        // the (d=16, m=2, s=7) fixture from the issue and the Python
2214        // `duchon_function_norm_penalty` PSD test.
2215        let (nullspace_order, default_power) = duchon_cubic_default(d);
2216        assert!(matches!(nullspace_order, DuchonNullspaceOrder::Linear));
2217        assert!(
2218            (default_power - 7.5).abs() < 1e-12,
2219            "cubic-default power for d=16 is 7.5"
2220        );
2221        let power = 7.0_f64;
2222        assert_eq!(duchon_power_to_usize(power), 7);
2223        // The reroute must engage for this fixture (s = 7 ≥ 1, 2p = 4 < d = 16).
2224        assert!(duchon_hybrid_stable_integral_applies(
2225            duchon_p_from_nullspace_order(nullspace_order),
2226            duchon_power_to_usize(power),
2227            d,
2228        ));
2229        let length_scale = Some(1.0_f64);
2230        let centers = fixture_centers(d, 4 * d);
2231
2232        let mut cache = BasisCacheContext::default();
2233        let z = kernel_constraint_nullspace(centers.view(), nullspace_order, &mut cache)
2234            .expect("constraint null space");
2235
2236        let omega = duchon_constrained_bending_penalty(
2237            centers.view(),
2238            length_scale,
2239            power,
2240            nullspace_order,
2241            None,
2242            &z,
2243        )
2244        .expect("constrained bending penalty assembles for the hybrid fixture");
2245        let (penalty, _scale) = normalize_penalty(&omega);
2246
2247        let lam_min = lambda_min(&penalty);
2248        assert!(
2249            lam_min >= -1e-10,
2250            "gam#1424: (d=16, m=2, s=7) hybrid penalty is not numerically PSD: \
2251             λ_min={lam_min:.6e} (was ≈ −0.26442 with the cancellation-prone \
2252             partial-fraction kernel)"
2253        );
2254    }
2255
2256    /// gam#1604: independent closed form for the hybrid-kernel origin value
2257    /// `φ(0) = F⁻¹[ρ^{-2p}(κ²+ρ²)^{-s}](0)` in `d` dimensions, derived by
2258    /// Schwinger-parametrizing both rational factors and evaluating the radial
2259    /// inverse-FT integral at `r = 0`:
2260    ///
2261    ///   φ(0) = (4π)^{-d/2} / Γ(s) · Γ(b) · κ^{-2b} · Γ(d/2 − p) / Γ(d/2),
2262    ///   b = p + s − d/2.
2263    ///
2264    /// Finite whenever `2(p+s) > d` and (for the Γ(d/2 − p) factor to avoid a
2265    /// pole) `d` is odd or `2p < d`. This reuses none of the Taylor-coefficient
2266    /// machinery under test, so it is a true oracle for the collision diagonal.
2267    fn phi0_closed_form(p: usize, s: usize, d: usize, kappa: f64) -> f64 {
2268        let half_d = 0.5 * d as f64;
2269        let b = p as f64 + s as f64 - half_d;
2270        (4.0 * std::f64::consts::PI).powf(-half_d) / gamma_lanczos(s as f64)
2271            * gamma_lanczos(b)
2272            * kappa.powf(-2.0 * b)
2273            * gamma_lanczos(half_d - p as f64)
2274            / gamma_lanczos(half_d)
2275    }
2276
2277    /// gam#1604 — the collision (r = 0) diagonal of the hybrid Duchon–Matérn
2278    /// kernel must equal the independent closed form above. The half-integer-ν
2279    /// Taylor coefficients that feed `duchon_hybrid_kernel_collision_value`
2280    /// previously miscounted the K_{l+½} polynomial degree (`l = 2|ν| − 1`
2281    /// instead of `|ν| − ½`), zeroing the r⁰ term of every |ν| ≥ 3/2 block and
2282    /// silently dropping their contribution to φ(0).
2283    #[test]
2284    fn hybrid_collision_diagonal_matches_closed_form_1604() {
2285        // Odd dimensions exercise the half-integer-ν path. For each, sweep p, s
2286        // (with 2(p+s) > d) and κ. d = 1, n ≥ 2 ⇒ ν ≥ 3/2 is the regressed case.
2287        for &d in &[1usize, 3, 5] {
2288            for &p in &[1usize, 2, 3] {
2289                for &s in &[1usize, 2, 3, 4] {
2290                    if 2 * (p + s) <= d {
2291                        continue;
2292                    }
2293                    for &kappa in &[0.5f64, 1.0, 2.5] {
2294                        let coeffs = duchon_partial_fraction_coeffs(p, s, kappa);
2295                        let got = duchon_hybrid_kernel_collision_value(
2296                            1.0 / kappa,
2297                            p,
2298                            s,
2299                            d,
2300                            &coeffs,
2301                        )
2302                        .expect("collision diagonal");
2303                        let want = phi0_closed_form(p, s, d, kappa);
2304                        let rel = (got - want).abs() / want.abs().max(1e-300);
2305                        assert!(
2306                            rel < 1e-10,
2307                            "φ(0) mismatch d={d} p={p} s={s} κ={kappa}: got {got:.12e}, want {want:.12e} (rel {rel:.2e})"
2308                        );
2309                    }
2310                }
2311            }
2312        }
2313    }
2314
2315    /// gam#1604 — the near-collision Taylor branch must be continuous with the
2316    /// direct partial-fraction sum: assembling φ(r) from φ(0), φ″(0), φ⁗(0),
2317    /// φ⁽⁶⁾(0) (all built from the same half-integer-ν Taylor coefficients) must
2318    /// match the cancellation-free direct block sum at a small radius where both
2319    /// are individually accurate. This exercises the j ≥ 1 coefficients (the
2320    /// diagonal test only pins j = 0).
2321    #[test]
2322    fn hybrid_near_collision_continuous_with_direct_1604() {
2323        for &d in &[1usize, 3] {
2324            for &p in &[1usize, 2] {
2325                for &s in &[2usize, 3] {
2326                    if 2 * (p + s) <= d + 6 {
2327                        // Need φ⁽⁶⁾(0) to exist for the full 6th-order Taylor.
2328                        continue;
2329                    }
2330                    for &kappa in &[0.5f64, 1.0, 2.0] {
2331                        let length_scale = 1.0 / kappa;
2332                        let coeffs = duchon_partial_fraction_coeffs(p, s, kappa);
2333                        // r small enough that the truncated 6th-order Taylor is
2334                        // accurate to ~r⁸, yet large enough that the direct block
2335                        // sum has not lost precision (d = 1/3, moderate κ).
2336                        let r = 0.02 * length_scale;
2337                        let taylor = duchon_hybrid_kernel_near_collision_value(
2338                            r,
2339                            length_scale,
2340                            p,
2341                            s,
2342                            d,
2343                            &coeffs,
2344                        )
2345                        .expect("near-collision value");
2346                        // Direct partial-fraction sum (real Bessel-K, no Taylor).
2347                        let mut direct = 0.0f64;
2348                        for (m, &a_m) in coeffs.a.iter().enumerate().skip(1) {
2349                            if a_m != 0.0 {
2350                                direct += a_m * polyharmonic_kernel(r, m as f64, d);
2351                            }
2352                        }
2353                        for (n, &b_n) in coeffs.b.iter().enumerate().skip(1) {
2354                            if b_n != 0.0 {
2355                                direct +=
2356                                    b_n * duchon_matern_block(r, kappa, n, d).expect("matern block");
2357                            }
2358                        }
2359                        let rel = (taylor - direct).abs() / direct.abs().max(1e-300);
2360                        assert!(
2361                            rel < 1e-9,
2362                            "near-collision vs direct mismatch d={d} p={p} s={s} κ={kappa} r={r}: \
2363                             taylor {taylor:.12e}, direct {direct:.12e} (rel {rel:.2e})"
2364                        );
2365                    }
2366                }
2367            }
2368        }
2369    }
2370
2371    /// gam#1604 — the production constrained Duchon penalty `Ω_c = α²·ZᵀK_CC Z`
2372    /// for a `d = 1` hybrid smooth with power ≥ 2 must be numerically PSD across
2373    /// realistic length scales. Before the Taylor-degree fix the corrupted
2374    /// diagonal made `Ω_c ≈ Ω_true − δ·I` (δ = the dropped diagonal mass),
2375    /// giving λ_min ≈ −δ < 0 at *every* length scale — the issue's report.
2376    #[test]
2377    fn d1_hybrid_penalty_is_psd_1604() {
2378        let d = 1usize;
2379        let nullspace_order = DuchonNullspaceOrder::Linear; // p = 2
2380        let centers = fixture_centers(d, 12);
2381        let mut cache = BasisCacheContext::default();
2382        let z = kernel_constraint_nullspace(centers.view(), nullspace_order, &mut cache)
2383            .expect("constraint null space");
2384        for &power in &[2.0f64, 3.0] {
2385            for &length_scale in &[0.5f64, 1.0, 10.0, 100.0] {
2386                let omega = duchon_constrained_bending_penalty(
2387                    centers.view(),
2388                    Some(length_scale),
2389                    power,
2390                    nullspace_order,
2391                    None,
2392                    &z,
2393                )
2394                .unwrap_or_else(|e| {
2395                    panic!("d=1 p=2 s={power} ls={length_scale} penalty rejected: {e}")
2396                });
2397                let (penalty, _scale) = normalize_penalty(&omega);
2398                let lam_min = lambda_min(&penalty);
2399                assert!(
2400                    lam_min >= -1e-9,
2401                    "d=1 p=2 s={power} ls={length_scale}: λ_min={lam_min:.6e} (not PSD)"
2402                );
2403            }
2404        }
2405    }
2406
2407    /// No-regression guard: a well-conditioned low-dimensional fixture must keep
2408    /// the exact kernel VALUES the partial-fraction path produced before the
2409    /// gam#1424 fix. For d=2 the stable-integral reroute does not apply
2410    /// (`2p=4 ≥ d=2`), so `duchon_matern_kernel_general_from_distance` still runs
2411    /// the original sum verbatim; pinning it against an independent direct
2412    /// evaluation of the same partial-fraction blocks proves the production
2413    /// routing is unchanged for low `d`.
2414    #[test]
2415    fn low_dim_hybrid_kernel_values_unchanged_1424() {
2416        let d = 2usize;
2417        let p_order = 2usize; // Linear null space (m=2)
2418        let s_order = 2usize;
2419        let kappa = 1.0_f64;
2420        let length_scale = Some(1.0_f64);
2421        // The d=2 case is NOT rerouted to the stable integral.
2422        assert!(!duchon_hybrid_stable_integral_applies(p_order, s_order, d));
2423        let coeffs = duchon_partial_fraction_coeffs(p_order, s_order, kappa);
2424
2425        for &r in &[0.25_f64, 0.75, 1.5] {
2426            // Independent reference: the raw partial-fraction sum
2427            // Σ a_m·r^{2m-d}(·log) + Σ b_n·matern_block, identical in form to the
2428            // production direct-sum branch but assembled here from scratch.
2429            let mut reference = 0.0_f64;
2430            for (m, &coeff) in coeffs.a.iter().enumerate().skip(1) {
2431                if coeff != 0.0 {
2432                    reference += coeff * polyharmonic_kernel(r, m as f64, d);
2433                }
2434            }
2435            for (n, &coeff) in coeffs.b.iter().enumerate().skip(1) {
2436                if coeff != 0.0 {
2437                    reference += coeff * duchon_matern_block(r, kappa, n, d).expect("matern block");
2438                }
2439            }
2440
2441            let got = duchon_matern_kernel_general_from_distance(
2442                r,
2443                length_scale,
2444                p_order,
2445                s_order,
2446                d,
2447                Some(&coeffs),
2448            )
2449            .expect("low-d hybrid kernel value");
2450            assert!(
2451                (got - reference).abs() <= 1e-10,
2452                "low-d hybrid kernel value regressed at r={r}: got {got:.15e}, reference {reference:.15e}"
2453            );
2454        }
2455    }
2456}