Skip to main content

gam_terms/basis/
duchon_kernel_math.rs

1use super::*;
2
3pub fn build_duchon_collocation_operator_matrices(
4    centers: ArrayView2<'_, f64>,
5    collocationweights: Option<ArrayView1<'_, f64>>,
6    length_scale: Option<f64>,
7    power: f64,
8    nullspace_order: DuchonNullspaceOrder,
9    aniso_log_scales: Option<&[f64]>,
10    identifiability_transform: Option<ArrayView2<'_, f64>>,
11    max_operator_derivative_order: usize,
12) -> Result<CollocationOperatorMatrices, BasisError> {
13    let mut workspace = BasisWorkspace::default();
14    build_duchon_collocation_operator_matriceswithworkspace(
15        centers,
16        centers,
17        collocationweights,
18        length_scale,
19        power,
20        nullspace_order,
21        aniso_log_scales,
22        identifiability_transform,
23        max_operator_derivative_order,
24        &mut workspace,
25    )
26}
27
28pub fn build_duchon_operator_penalty_matrices(
29    centers: ArrayView2<'_, f64>,
30    collocationweights: Option<ArrayView1<'_, f64>>,
31    length_scale: Option<f64>,
32    power: f64,
33    nullspace_order: DuchonNullspaceOrder,
34    aniso_log_scales: Option<&[f64]>,
35    identifiability_transform: Option<ArrayView2<'_, f64>>,
36) -> Result<DuchonOperatorPenaltyMatrices, BasisError> {
37    let ops = build_duchon_collocation_operator_matrices(
38        centers,
39        collocationweights,
40        length_scale,
41        power,
42        nullspace_order,
43        aniso_log_scales,
44        identifiability_transform,
45        2,
46    )?;
47    let (mass, _) = normalize_penalty(&symmetrize(&fast_ata(&ops.d0)));
48    let (tension, _) = normalize_penalty(&symmetrize(&fast_ata(&ops.d1)));
49    let (stiffness, _) = normalize_penalty(&symmetrize(&fast_ata(&ops.d2)));
50    Ok(DuchonOperatorPenaltyMatrices {
51        mass,
52        tension,
53        stiffness,
54    })
55}
56
57pub fn build_thin_plate_penalty_matrix(
58    centers: ArrayView2<'_, f64>,
59    length_scale: f64,
60) -> Result<ThinPlatePenaltyMatrix, BasisError> {
61    let mut workspace = BasisWorkspace::default();
62    let kernel_transform = thin_plate_kernel_constraint_nullspace(centers, &mut workspace.cache)?;
63    let (penalty, _) =
64        build_thin_plate_penalty_matrices(centers, length_scale, &kernel_transform, false)?;
65    let (penalty, _) = normalize_penalty(&penalty);
66    Ok(ThinPlatePenaltyMatrix { penalty })
67}
68
69pub fn build_duchon_collocation_operator_matriceswithworkspace(
70    centers: ArrayView2<'_, f64>,
71    collocation_points: ArrayView2<'_, f64>,
72    collocationweights: Option<ArrayView1<'_, f64>>,
73    length_scale: Option<f64>,
74    power: f64,
75    nullspace_order: DuchonNullspaceOrder,
76    aniso_log_scales: Option<&[f64]>,
77    identifiability_transform: Option<ArrayView2<'_, f64>>,
78    max_operator_derivative_order: usize,
79    workspace: &mut BasisWorkspace,
80) -> Result<CollocationOperatorMatrices, BasisError> {
81    // The operator design rows are the COLLOCATION points (a density-blind,
82    // space-filling sample of the data support); the columns are the `k` basis
83    // CENTERS. Decoupling them is what makes the operator penalty a faithful
84    // quadrature of `∫‖Dᵠf‖²` (collocating at the `k` centers themselves — the
85    // old `collocation_points == centers` special case — under-samples a
86    // `k`-bump basis and is what made these penalties explode).
87    let nullspace_order = duchon_effective_nullspace_order(centers, nullspace_order);
88    let p_order = duchon_p_from_nullspace_order(nullspace_order);
89    let s_order: f64 = power;
90    let p_colloc = collocation_points.nrows();
91    let n_basis = centers.nrows();
92    let dim = centers.ncols();
93    if collocation_points.ncols() != dim {
94        crate::bail_dim_basis!(
95            "collocation points dim {} != centers dim {dim}",
96            collocation_points.ncols()
97        );
98    }
99    validate_duchon_collocation_orders(
100        length_scale,
101        p_order,
102        s_order,
103        dim,
104        max_operator_derivative_order,
105    )?;
106    if let Some(eta) = aniso_log_scales
107        && eta.len() != dim
108    {
109        crate::bail_dim_basis!(
110            "Duchon anisotropy dimension mismatch: got {}, expected {dim}",
111            eta.len()
112        );
113    }
114    // Partial-fraction expansion only runs in the hybrid Matérn branch
115    // (`length_scale = Some`). The scale-free path (`length_scale = None`)
116    // skips it entirely and is fractional-clean down to the Riesz kernel.
117    let coeffs = length_scale.map(|scale| {
118        let s_int = duchon_power_to_usize(s_order);
119        duchon_partial_fraction_coeffs(p_order, s_int, 1.0 / scale.max(1e-300))
120    });
121    let metric_weights: Option<Vec<f64>> = aniso_log_scales.map(centered_aniso_metric_weights);
122    let row_scales = if let Some(w) = collocationweights {
123        if w.len() != p_colloc {
124            crate::bail_dim_basis!(
125                "collocation weight length mismatch: got {}, expected {p_colloc}",
126                w.len()
127            );
128        }
129        let mut out = Vec::with_capacity(p_colloc);
130        for &wk in w {
131            if !wk.is_finite() || wk < 0.0 {
132                crate::bail_invalid_basis!(
133                    "collocation weights must be finite and non-negative; got {wk}"
134                );
135            }
136            out.push(wk.sqrt());
137        }
138        out
139    } else {
140        vec![1.0; p_colloc]
141    };
142    let z = kernel_constraint_nullspace(centers, nullspace_order, &mut workspace.cache)?;
143    // D0/D1/D2 rows = collocation points (`p_colloc`), columns = basis centers
144    // (`n_basis`). Gradients/Hessians are taken w.r.t. the EVALUATION point
145    // (the collocation row), so `delta = collocation - center`. No symmetry: the
146    // two point sets differ in general.
147    // Skip the costly higher-derivative designs the caller doesn't need: mass
148    // (D0) + tension (D1) build with `max_op = 1`, so the `O(d²)`-row Hessian
149    // (D2) is never allocated or filled — decisive in high `d`.
150    let build_d1 = max_operator_derivative_order >= 1;
151    let build_d2 = max_operator_derivative_order >= 2;
152    let mut d0_raw = Array2::<f64>::zeros((p_colloc, n_basis));
153    let mut d1_raw = Array2::<f64>::zeros((if build_d1 { p_colloc * dim } else { 0 }, n_basis));
154    let mut d2_raw =
155        Array2::<f64>::zeros((if build_d2 { p_colloc * dim * dim } else { 0 }, n_basis));
156    const R_EPS: f64 = 1e-10;
157    for i in 0..p_colloc {
158        let scale_i = row_scales[i];
159        for j in 0..n_basis {
160            let r = if let Some(eta) = aniso_log_scales {
161                let row_i: Vec<f64> = (0..dim).map(|a| collocation_points[[i, a]]).collect();
162                let row_j: Vec<f64> = (0..dim).map(|a| centers[[j, a]]).collect();
163                aniso_distance(&row_i, &row_j, eta)
164            } else {
165                stable_euclidean_norm(
166                    (0..dim).map(|axis| collocation_points[[i, axis]] - centers[[j, axis]]),
167                )
168            };
169            // Floor coincident collocation/center pairs off the kernel's origin
170            // singularity: a farthest-point sample can land exactly on a center.
171            // The gradient/Hessian limits at r→0 are the zeros the `r > R_EPS`
172            // guards below already produce, so flooring only avoids the log-case
173            // `r²·log r` second-derivative blow-up at exact r=0.
174            let r = r.max(R_EPS);
175            let (phi, q, t) = if let (Some(length_scale), Some(coeffs)) =
176                (length_scale, coeffs.as_ref())
177            {
178                let jets =
179                    duchon_radial_jets(r, length_scale, p_order, s_order as usize, dim, coeffs)?;
180                (jets.phi, jets.q, jets.t)
181            } else {
182                let (phi, phi_r, phi_rr) = duchon_kernel_radial_triplet(
183                    r,
184                    length_scale,
185                    p_order,
186                    s_order,
187                    dim,
188                    coeffs.as_ref(),
189                )?;
190                let q = if r > R_EPS { phi_r / r } else { phi_rr };
191                let t = if r > R_EPS {
192                    (phi_rr - q) / (r * r)
193                } else {
194                    0.0
195                };
196                (phi, q, t)
197            };
198            if !phi.is_finite() || !q.is_finite() || !t.is_finite() {
199                crate::bail_invalid_basis!(
200                    "non-finite Duchon collocation operator derivative at (colloc {i}, center {j}), r={r}"
201                );
202            }
203            d0_raw[[i, j]] = scale_i * phi;
204            if build_d2 {
205                for axis_a in 0..dim {
206                    let h_a = collocation_points[[i, axis_a]] - centers[[j, axis_a]];
207                    let w_a = metric_weights
208                        .as_ref()
209                        .map(|weights| weights[axis_a])
210                        .unwrap_or(1.0);
211                    for axis_b in 0..dim {
212                        let h_b = collocation_points[[i, axis_b]] - centers[[j, axis_b]];
213                        let w_b = metric_weights
214                            .as_ref()
215                            .map(|weights| weights[axis_b])
216                            .unwrap_or(1.0);
217                        let diagonal = if axis_a == axis_b { q * w_a } else { 0.0 };
218                        let mixed = if r > R_EPS {
219                            t * w_a * h_a * w_b * h_b
220                        } else {
221                            0.0
222                        };
223                        let value = diagonal + mixed;
224                        let row_i = (i * dim + axis_a) * dim + axis_b;
225                        d2_raw[[row_i, j]] = scale_i * value;
226                    }
227                }
228            }
229            if build_d1 && r > R_EPS {
230                for axis in 0..dim {
231                    let delta = collocation_points[[i, axis]] - centers[[j, axis]];
232                    let axis_scale = metric_weights
233                        .as_ref()
234                        .map(|weights| weights[axis])
235                        .unwrap_or(1.0);
236                    d1_raw[[i * dim + axis, j]] = scale_i * q * axis_scale * delta;
237                }
238            }
239        }
240    }
241    let d0_kernel = fast_ab(&d0_raw, &z);
242    let poly = polynomial_block_from_order(centers, nullspace_order);
243    let poly_collocation = polynomial_block_from_order(collocation_points, nullspace_order);
244    let poly_d1 = if build_d1 {
245        polynomial_derivative_block(collocation_points, nullspace_order, 1)
246    } else {
247        Array2::<f64>::zeros((0, poly.ncols()))
248    };
249    let poly_d2 = if build_d2 {
250        polynomial_derivative_block(collocation_points, nullspace_order, 2)
251    } else {
252        Array2::<f64>::zeros((0, poly.ncols()))
253    };
254    let kernel_cols = d0_kernel.ncols();
255    let poly_cols = poly.ncols();
256    let total_cols = kernel_cols + poly_cols;
257    // The operator matrices act on the SAME coefficient basis as the emitted
258    // design: constrained radial columns followed by explicit polynomial
259    // null-space columns.  The lower-order Hilbert-scale penalties are function
260    // penalties, not just radial-kernel penalties, so the polynomial block must
261    // be evaluated/differentiated at the collocation sites too: D0 sees the
262    // polynomial value, D1 its gradient, and D2 its Hessian. Orders the caller
263    // skipped stay empty (0 rows).
264    let mut d0 = Array2::<f64>::zeros((p_colloc, total_cols));
265    d0.slice_mut(s![.., 0..kernel_cols]).assign(&d0_kernel);
266    d0.slice_mut(s![.., kernel_cols..total_cols])
267        .assign(&poly_collocation);
268    let mut d1 = Array2::<f64>::zeros((if build_d1 { p_colloc * dim } else { 0 }, total_cols));
269    if build_d1 {
270        d1.slice_mut(s![.., 0..kernel_cols])
271            .assign(&fast_ab(&d1_raw, &z));
272        d1.slice_mut(s![.., kernel_cols..total_cols])
273            .assign(&poly_d1);
274    }
275    let mut d2 =
276        Array2::<f64>::zeros((if build_d2 { p_colloc * dim * dim } else { 0 }, total_cols));
277    if build_d2 {
278        d2.slice_mut(s![.., 0..kernel_cols])
279            .assign(&fast_ab(&d2_raw, &z));
280        d2.slice_mut(s![.., kernel_cols..total_cols])
281            .assign(&poly_d2);
282    }
283    if let Some(z) = identifiability_transform {
284        let z = z.to_owned();
285        d0 = fast_ab(&d0, &z);
286        d1 = fast_ab(&d1, &z);
287        d2 = fast_ab(&d2, &z);
288    }
289    Ok(CollocationOperatorMatrices {
290        d0,
291        d1,
292        d2,
293        collocation_points: collocation_points.to_owned(),
294        kernel_nullspace_transform: Some(z),
295        polynomial_block_cols: poly_cols,
296    })
297}
298
299fn polynomial_derivative_block(
300    points: ArrayView2<'_, f64>,
301    order: DuchonNullspaceOrder,
302    derivative_order: usize,
303) -> Array2<f64> {
304    let n = points.nrows();
305    let d = points.ncols();
306    let degree = match order {
307        DuchonNullspaceOrder::Zero => 0,
308        DuchonNullspaceOrder::Linear => 1,
309        DuchonNullspaceOrder::Degree(degree) => degree,
310    };
311    let exponents = monomial_exponents(d, degree);
312    match derivative_order {
313        1 => {
314            let mut block = Array2::<f64>::zeros((n * d, exponents.len()));
315            for row in 0..n {
316                for axis in 0..d {
317                    let out_row = row * d + axis;
318                    for (col, exps) in exponents.iter().enumerate() {
319                        block[[out_row, col]] = monomial_derivative_value(points, row, exps, axis);
320                    }
321                }
322            }
323            block
324        }
325        2 => {
326            let mut block = Array2::<f64>::zeros((n * d * d, exponents.len()));
327            for row in 0..n {
328                for axis_a in 0..d {
329                    for axis_b in 0..d {
330                        let out_row = (row * d + axis_a) * d + axis_b;
331                        for (col, exps) in exponents.iter().enumerate() {
332                            block[[out_row, col]] =
333                                monomial_second_derivative_value(points, row, exps, axis_a, axis_b);
334                        }
335                    }
336                }
337            }
338            block
339        }
340        _ => Array2::<f64>::zeros((0, exponents.len())),
341    }
342}
343
344fn monomial_derivative_value(
345    points: ArrayView2<'_, f64>,
346    row: usize,
347    exponents: &[usize],
348    axis: usize,
349) -> f64 {
350    let exponent = exponents[axis];
351    if exponent == 0 {
352        return 0.0;
353    }
354    let mut value = exponent as f64;
355    for a in 0..points.ncols() {
356        let power = exponents[a] - usize::from(a == axis);
357        if power != 0 {
358            value *= points[[row, a]].powi(power as i32);
359        }
360    }
361    value
362}
363
364fn monomial_second_derivative_value(
365    points: ArrayView2<'_, f64>,
366    row: usize,
367    exponents: &[usize],
368    axis_a: usize,
369    axis_b: usize,
370) -> f64 {
371    let coeff = if axis_a == axis_b {
372        let exponent = exponents[axis_a];
373        if exponent < 2 {
374            return 0.0;
375        }
376        (exponent * (exponent - 1)) as f64
377    } else {
378        let exponent_a = exponents[axis_a];
379        let exponent_b = exponents[axis_b];
380        if exponent_a == 0 || exponent_b == 0 {
381            return 0.0;
382        }
383        (exponent_a * exponent_b) as f64
384    };
385    let mut value = coeff;
386    for axis in 0..points.ncols() {
387        let consumed = usize::from(axis == axis_a) + usize::from(axis == axis_b);
388        let power = exponents[axis] - consumed;
389        if power != 0 {
390            value *= points[[row, axis]].powi(power as i32);
391        }
392    }
393    value
394}
395
396#[inline(always)]
397pub(crate) fn bessel_k0_stable(x: f64) -> f64 {
398    let x_pos = x.max(1e-300);
399    if x_pos <= 2.0 {
400        return bessel_k0_small_series(x_pos);
401    }
402    let y = 2.0 / x_pos;
403    (-x_pos).exp() / x_pos.sqrt()
404        * (1.253_314_14
405            + y * (-0.078_323_58
406                + y * (0.021_895_68
407                    + y * (-0.010_624_46
408                        + y * (0.005_878_72 + y * (-0.002_515_40 + y * 0.000_532_08))))))
409}
410
411#[inline(always)]
412pub(crate) fn bessel_k1_stable(x: f64) -> f64 {
413    let x_pos = x.max(1e-300);
414    if x_pos <= 2.0 {
415        return bessel_k1_small_series(x_pos);
416    }
417    let y = 2.0 / x_pos;
418    (-x_pos).exp() / x_pos.sqrt()
419        * (1.253_314_14
420            + y * (0.234_986_19
421                + y * (-0.036_556_20
422                    + y * (0.015_042_68
423                        + y * (-0.007_803_53 + y * (0.003_256_14 + y * -0.000_682_45))))))
424}
425
426#[inline(always)]
427pub(crate) fn bessel_k0_k1_small_series(x: f64) -> (f64, f64) {
428    const EULER_GAMMA: f64 = 0.577_215_664_901_532_9;
429    let y = 0.25 * x * x;
430    let log_half_plus_gamma = 0.5 * y.ln() + EULER_GAMMA;
431    let mut i0 = 1.0;
432    let mut i1 = 0.5 * x;
433    let mut harmonic = 0.0;
434    let mut y_power_over_fact_sq = 1.0;
435    let mut k0_series = 0.0;
436    let mut k0_series_y_derivative_times_y = 0.0;
437    for k in 1..=256 {
438        let kf = k as f64;
439        harmonic += 1.0 / kf;
440        y_power_over_fact_sq *= y / (kf * kf);
441        let k0_term = harmonic * y_power_over_fact_sq;
442        k0_series += k0_term;
443        k0_series_y_derivative_times_y += kf * k0_term;
444        i0 += y_power_over_fact_sq;
445        i1 += 0.5 * x * y_power_over_fact_sq / (kf + 1.0);
446        if k0_term.abs() <= f64::EPSILON * i0.abs().max(k0_series.abs()).max(1.0) {
447            break;
448        }
449    }
450
451    let k0 = -log_half_plus_gamma * i0 + k0_series;
452    let k1 = i0 / x + log_half_plus_gamma * i1 - (2.0 / x) * k0_series_y_derivative_times_y;
453    (k0, k1)
454}
455
456#[inline(always)]
457pub(crate) fn bessel_k0_small_series(x: f64) -> f64 {
458    bessel_k0_k1_small_series(x).0
459}
460
461#[inline(always)]
462pub(crate) fn bessel_k1_small_series(x: f64) -> f64 {
463    bessel_k0_k1_small_series(x).1
464}
465
466pub(crate) const DUCHON_DERIVATIVE_R_FLOOR_REL: f64 = 1e-5;
467
468pub(crate) const DUCHON_COLLISION_TAYLOR_REL: f64 = 1e-4;
469
470/// Minimum `(row, center)` pair count before a radial design sweep builds a
471/// certified [`radial_profile::RadialProfile`] instead of evaluating every
472/// pair exactly. The profile build costs a few hundred exact jet
473/// evaluations, so it only pays for itself when the sweep reuses it well
474/// beyond that; below the threshold the exact path keeps small fits
475/// bit-identical to the pre-profile behavior.
476pub(crate) const RADIAL_PROFILE_MIN_PAIRS: usize = 16_384;
477
478#[inline(always)]
479pub(crate) fn duchon_p_from_nullspace_order(order: DuchonNullspaceOrder) -> usize {
480    match order {
481        // Duchon null spaces contain all polynomials of degree < m.
482        // The public `order` knob chooses that polynomial degree cutoff:
483        //   order=0 -> constants only  -> m=1
484        //   order=1 -> constants+linear -> m=2
485        DuchonNullspaceOrder::Zero => 1,
486        DuchonNullspaceOrder::Linear => 2,
487        DuchonNullspaceOrder::Degree(degree) => degree + 1,
488    }
489}
490
491/// Returns the effective Duchon null-space order, auto-degrading when the
492/// requested order leaves no radial kernel degrees of freedom.
493///
494/// The constrained kernel block has `centers.nrows() - rank(P)` columns, where
495/// `P` is the polynomial null-space block. A valid polynomial block with
496/// exactly as many centers as columns is still useless for smoothing: every
497/// center is consumed by the side constraints and the design collapses to the
498/// polynomial tail. Degrade to the highest lower null-space order with at
499/// least one constrained kernel column.
500pub(crate) fn duchon_effective_nullspace_order(
501    centers: ArrayView2<'_, f64>,
502    order: DuchonNullspaceOrder,
503) -> DuchonNullspaceOrder {
504    if order == DuchonNullspaceOrder::Zero {
505        return order;
506    }
507    let mut effective = order;
508    while effective != DuchonNullspaceOrder::Zero
509        && centers.nrows() <= polynomial_block_from_order(centers, effective).ncols()
510    {
511        effective = duchon_previous_nullspace_order(effective);
512    }
513    if effective != order {
514        // Dedup: warn only once per (rows, cols, requested_order) per process.
515        // BFGS × P-IRLS × derivative callsites hit this path many times.
516        static SEEN: std::sync::OnceLock<
517            std::sync::Mutex<std::collections::HashSet<(usize, usize, DuchonNullspaceOrder)>>,
518        > = std::sync::OnceLock::new();
519        let seen = SEEN.get_or_init(|| std::sync::Mutex::new(std::collections::HashSet::new()));
520        let key = (centers.nrows(), centers.ncols(), order);
521        let fresh = seen.lock().map(|mut s| s.insert(key)).unwrap_or(true);
522        if fresh {
523            let requested_cols = polynomial_block_from_order(centers, order).ncols();
524            let effective_cols = polynomial_block_from_order(centers, effective).ncols();
525            log::warn!(
526                "Duchon nullspace order={:?} in dim={} with {} centers leaves no radial kernel columns (polynomial_cols={}); degrading to {:?} (polynomial_cols={})",
527                order,
528                centers.ncols(),
529                centers.nrows(),
530                requested_cols,
531                effective,
532                effective_cols
533            );
534        }
535    }
536    effective
537}
538
539#[inline(always)]
540pub(crate) fn gamma_lanczos(x: f64) -> f64 {
541    // Numerical Recipes / Lanczos approximation with reflection formula.
542    const G: f64 = 7.0;
543    const P: [f64; 9] = [
544        0.999_999_999_999_809_9,
545        676.520_368_121_885_1,
546        -1_259.139_216_722_402_8,
547        771.323_428_777_653_1,
548        -176.615_029_162_140_6,
549        12.507_343_278_686_905,
550        -0.138_571_095_265_720_12,
551        9.984_369_578_019_571e-6,
552        1.505_632_735_149_311_6e-7,
553    ];
554    if x < 0.5 {
555        let pix = std::f64::consts::PI * x;
556        return std::f64::consts::PI / (pix.sin() * gamma_lanczos(1.0 - x));
557    }
558    let z = x - 1.0;
559    let mut a = P[0];
560    for (i, coeff) in P.iter().enumerate().skip(1) {
561        a += coeff / (z + i as f64);
562    }
563    let t = z + G + 0.5;
564    (2.0 * std::f64::consts::PI).sqrt() * t.powf(z + 0.5) * (-t).exp() * a
565}
566
567#[inline(always)]
568pub(crate) fn bessel_k_integer_order(n: usize, z: f64) -> f64 {
569    let zz = z.max(1e-300);
570    if n == 0 {
571        return bessel_k0_stable(zz);
572    }
573    if n == 1 {
574        return bessel_k1_stable(zz);
575    }
576    let mut km1 = bessel_k0_stable(zz);
577    let mut k = bessel_k1_stable(zz);
578    for m in 1..n {
579        let kp1 = km1 + 2.0 * (m as f64) * k / zz;
580        km1 = k;
581        k = kp1;
582    }
583    k
584}
585
586#[inline(always)]
587pub(crate) fn bessel_k_half_integer_order(l: usize, z: f64) -> f64 {
588    // Exact closed-form seeds and the stable upward recurrence
589    //   K_{1/2}(z) = sqrt(π/(2z))·e^{−z},
590    //   K_{3/2}(z) = K_{1/2}(z)·(1 + 1/z),
591    //   K_{ν+1}(z) = K_{ν−1}(z) + (2ν/z)·K_ν(z)   (ν = 1/2 + m, m ≥ 1).
592    // Equivalent to the closed-form polynomial sum, but uses EXACT integer
593    // coefficients via the recurrence instead of approximate Lanczos-gamma
594    // values for `c_j = (l+j)!/(j!(l−j)!)`. The Lanczos approximation is
595    // accurate to ~1 ULP at integer arguments; that error gets amplified
596    // through catastrophic cancellation in derivative lattices of the
597    // r^μ·K_μ(κr) family. Matching the [`BesselKLadder`] arithmetic byte-
598    // for-byte also ensures the ladder/per-call paths agree exactly.
599    let zz = z.max(1e-300);
600    let k_half = (std::f64::consts::PI / (2.0 * zz)).sqrt() * (-zz).exp();
601    if l == 0 {
602        return k_half;
603    }
604    let mut km1 = k_half;
605    let mut k = k_half * (1.0 + 1.0 / zz);
606    for m in 1..l {
607        let nu = m as f64 + 0.5;
608        let kp1 = km1 + 2.0 * nu * k / zz;
609        km1 = k;
610        k = kp1;
611    }
612    k
613}
614
615#[inline(always)]
616pub(crate) fn bessel_k_real_half_integer_or_integer(
617    nu_abs: f64,
618    z: f64,
619) -> Result<f64, BasisError> {
620    let two_nu = (2.0 * nu_abs).round();
621    if (two_nu - 2.0 * nu_abs).abs() > 1e-12 {
622        crate::bail_invalid_basis!(
623            "unsupported Bessel-K order ν={nu_abs}; only integer/half-integer orders are supported"
624        );
625    }
626    let two_nu_i = two_nu as i64;
627    if two_nu_i % 2 == 0 {
628        let n = (two_nu_i / 2).max(0) as usize;
629        Ok(bessel_k_integer_order(n, z))
630    } else {
631        let l = ((two_nu_i - 1) / 2).max(0) as usize;
632        Ok(bessel_k_half_integer_order(l, z))
633    }
634}
635
636/// Precomputed coefficient for `polyharmonic_kernel` that depends only on
637/// `m` and `k_dim`, not on `r`.  Avoids repeated gamma_lanczos calls in the
638/// hot kernel evaluation loop (called n × k times per basis build).
639#[derive(Clone, Copy)]
640pub(crate) struct PolyharmonicBlockCoeff {
641    pub(crate) c: f64,
642    pub(crate) power: f64,
643    pub(crate) is_log_case: bool,
644}
645
646impl PolyharmonicBlockCoeff {
647    pub(crate) fn new(m: f64, k_dim: usize) -> Self {
648        assert!(
649            m.is_finite() && m > 0.0,
650            "PolyharmonicBlockCoeff::new: m must be finite and > 0, got {m}"
651        );
652        let k_half = 0.5 * k_dim as f64;
653        let power = 2.0 * m - k_dim as f64;
654        // Log case: k_dim is even and `2m − k_dim` is a non-negative even
655        // integer (within ε). For fractional `m` this never fires; for
656        // integer `m` it matches the original integer modulo check exactly.
657        const LOG_EPS: f64 = 1e-12;
658        let two_m = 2.0 * m;
659        let is_log_case = k_dim.is_multiple_of(2) && {
660            let n_f = (power / 2.0).round();
661            n_f >= 0.0 && (n_f * 2.0 - power).abs() < LOG_EPS
662        };
663        if is_log_case {
664            let m_int = m.round() as i64;
665            let m_minus_half_d_plus_one = (m - k_half + 1.0).round() as i64;
666            let c = polyharmonic_log_sign(m_int as usize, k_dim)
667                / (2.0_f64.powi((two_m.round() as i32) - 1)
668                    * std::f64::consts::PI.powf(k_half)
669                    * gamma_lanczos(m)
670                    * gamma_lanczos(m_minus_half_d_plus_one as f64));
671            Self {
672                c,
673                power,
674                is_log_case: true,
675            }
676        } else {
677            let c = gamma_lanczos(k_half - m)
678                / (4.0_f64.powf(m) * std::f64::consts::PI.powf(k_half) * gamma_lanczos(m));
679            Self {
680                c,
681                power,
682                is_log_case: false,
683            }
684        }
685    }
686
687    #[inline(always)]
688    pub(crate) fn eval(&self, r: f64) -> f64 {
689        if r <= 0.0 {
690            return self.origin_limit();
691        }
692        if self.is_log_case {
693            self.c * r.powf(self.power) * r.max(1e-300).ln()
694        } else {
695            self.c * r.powf(self.power)
696        }
697    }
698
699    #[inline(always)]
700    pub(crate) fn origin_limit(&self) -> f64 {
701        if self.is_log_case {
702            log_power_origin_limit(self.c, self.power, 1.0, 0.0)
703        } else {
704            log_power_origin_limit(self.c, self.power, 0.0, 1.0)
705        }
706    }
707}
708
709pub(crate) fn polyharmonic_kernel(r: f64, m: f64, k_dim: usize) -> f64 {
710    PolyharmonicBlockCoeff::new(m, k_dim).eval(r)
711}
712
713#[inline(always)]
714pub(crate) fn signed_infinity(sign: f64) -> f64 {
715    if sign.is_sign_negative() {
716        f64::NEG_INFINITY
717    } else {
718        f64::INFINITY
719    }
720}
721
722#[inline(always)]
723pub(crate) fn log_power_origin_limit(
724    coeff: f64,
725    exponent: f64,
726    log_coeff: f64,
727    pure_coeff: f64,
728) -> f64 {
729    if log_coeff == 0.0 && pure_coeff == 0.0 {
730        return 0.0;
731    }
732    if exponent > 0.0 {
733        return 0.0;
734    }
735    if exponent == 0.0 {
736        if log_coeff != 0.0 {
737            signed_infinity(-coeff * log_coeff)
738        } else {
739            coeff * pure_coeff
740        }
741    } else if log_coeff != 0.0 {
742        signed_infinity(-coeff * log_coeff)
743    } else {
744        signed_infinity(coeff * pure_coeff)
745    }
746}
747
748#[inline(always)]
749pub(crate) fn polyharmonic_log_sign(m: usize, k_dim: usize) -> f64 {
750    assert!(
751        k_dim.is_multiple_of(2),
752        "polyharmonic_log_sign requires even kernel dimension: k_dim={k_dim}, m={m}"
753    );
754    (-1.0_f64).powi(m as i32 - (k_dim as i32 / 2) + 1)
755}
756
757#[inline(always)]
758pub(crate) fn duchon_matern_block(
759    r: f64,
760    kappa: f64,
761    n_order: usize,
762    k_dim: usize,
763) -> Result<f64, BasisError> {
764    let n = n_order as f64;
765    let k_half = 0.5 * k_dim as f64;
766    let nu = n - k_half;
767    let nu_abs = nu.abs();
768    let c = kappa.powf(k_half - n)
769        / ((2.0 * std::f64::consts::PI).powf(k_half) * 2.0_f64.powf(n - 1.0) * gamma_lanczos(n));
770    if r <= 0.0 {
771        if nu > 0.0 {
772            // r^ν K_ν(κr) → 2^(ν−1) Γ(ν) κ^(−ν) as r→0+.
773            return Ok(c * 2.0_f64.powf(nu - 1.0) * gamma_lanczos(nu) * kappa.powf(-nu));
774        }
775        // ν ≤ 0: c·r^ν·K_|ν|(κr) is divergent at r=0 (logarithmically for ν=0,
776        // power-law for ν<0). The hybrid-kernel diagonal must be evaluated via
777        // duchon_hybrid_kernel_collision_value, which sums the divergent
778        // Matérn and polyharmonic blocks so the singularities cancel exactly
779        // (guaranteed by the PFD identity when 2(p+s) > d).
780        crate::bail_invalid_basis!(
781            "Duchon Matérn block at r=0 with ν={nu} ≤ 0 is divergent; \
782             evaluate the hybrid kernel diagonal via the collision routine"
783        );
784    }
785    let z = (kappa * r).max(1e-300);
786    let k_nu = bessel_k_real_half_integer_or_integer(nu_abs, z)?;
787    Ok(c * r.powf(nu) * k_nu)
788}
789
790#[inline(always)]
791pub(crate) fn polyharmonic_kernel_triplet(
792    r: f64,
793    m: f64,
794    k_dim: usize,
795) -> Result<(f64, f64, f64), BasisError> {
796    let (value, first, second, _, _) = polyharmonic_block_jet4(r, m, k_dim)?;
797    Ok((value, first, second))
798}
799
800#[inline(always)]
801pub(crate) fn falling_factorial(alpha: f64, order: usize) -> f64 {
802    (0..order).fold(1.0, |acc, idx| acc * (alpha - idx as f64))
803}
804
805#[inline(always)]
806pub(crate) fn falling_factorial_derivative(alpha: f64, order: usize) -> f64 {
807    if order == 0 {
808        return 0.0;
809    }
810    let mut total = 0.0;
811    for omit in 0..order {
812        let mut term = 1.0;
813        for idx in 0..order {
814            if idx != omit {
815                term *= alpha - idx as f64;
816            }
817        }
818        total += term;
819    }
820    total
821}
822
823/// Unified radial jet for one polyharmonic partial-fraction block.
824///
825/// Returns (φ, φ', φ'', φ''', φ'''') from a single consistent evaluation,
826/// sharing normalization constant, r_safe, and log_r. This eliminates the
827/// possibility of numerical drift between the triplet and higher-order
828/// derivative paths.
829pub(crate) fn polyharmonic_block_jet4(
830    r: f64,
831    m: f64,
832    k_dim: usize,
833) -> Result<(f64, f64, f64, f64, f64), BasisError> {
834    if !r.is_finite() || r < 0.0 {
835        crate::bail_invalid_basis!("polyharmonic distance must be finite and non-negative");
836    }
837    assert!(
838        m.is_finite() && m > 0.0,
839        "polyharmonic_block_jet4: m must be finite and > 0, got {m}"
840    );
841
842    let k_half = 0.5 * k_dim as f64;
843    let alpha = 2.0 * m - k_dim as f64;
844    // Log case: k_dim even and `2m − k_dim` is a non-negative even integer
845    // (within ε). For fractional `m` this never fires.
846    const LOG_EPS: f64 = 1e-12;
847    let is_log_case = k_dim.is_multiple_of(2) && {
848        let n_f = (alpha / 2.0).round();
849        n_f >= 0.0 && (n_f * 2.0 - alpha).abs() < LOG_EPS
850    };
851    if is_log_case {
852        let m_int = m.round() as usize;
853        let c = polyharmonic_log_sign(m_int, k_dim)
854            / (2.0_f64.powi((2 * m_int - 1) as i32)
855                * std::f64::consts::PI.powf(k_half)
856                * gamma_lanczos(m)
857                * gamma_lanczos((m_int - k_dim / 2 + 1) as f64));
858        let mut out = [0.0; 5];
859        for d in 0..5 {
860            let e = alpha - d as f64;
861            let ff = falling_factorial(alpha, d);
862            let ff_d = falling_factorial_derivative(alpha, d);
863            out[d] = if r <= 0.0 {
864                log_power_origin_limit(c, e, ff, ff_d)
865            } else {
866                c * r.powf(e) * (ff * r.ln() + ff_d)
867            };
868        }
869        return Ok((out[0], out[1], out[2], out[3], out[4]));
870    }
871
872    let c = gamma_lanczos(k_half - m)
873        / (4.0_f64.powf(m) * std::f64::consts::PI.powf(k_half) * gamma_lanczos(m));
874    let mut out = [0.0; 5];
875    for d in 0..5 {
876        let e = alpha - d as f64;
877        let ff = falling_factorial(alpha, d);
878        out[d] = if r <= 0.0 {
879            log_power_origin_limit(c, e, 0.0, ff)
880        } else {
881            c * ff * r.powf(e)
882        };
883    }
884    Ok((out[0], out[1], out[2], out[3], out[4]))
885}
886
887#[inline(always)]
888pub(crate) fn log_power_family_derivative(
889    exponent: f64,
890    log_coeff: f64,
891    pure_coeff: f64,
892) -> (f64, f64, f64) {
893    (
894        exponent - 1.0,
895        exponent * log_coeff,
896        exponent * pure_coeff + log_coeff,
897    )
898}
899
900#[inline(always)]
901pub(crate) fn log_power_family_value(
902    r: f64,
903    coeff: f64,
904    exponent: f64,
905    log_coeff: f64,
906    pure_coeff: f64,
907) -> f64 {
908    if r <= 0.0 {
909        log_power_origin_limit(coeff, exponent, log_coeff, pure_coeff)
910    } else {
911        coeff * r.powf(exponent) * (log_coeff * r.ln() + pure_coeff)
912    }
913}
914
915#[inline(always)]
916pub(crate) fn duchon_polyharmonic_operator_block_jets(
917    r: f64,
918    m: f64,
919    k_dim: usize,
920) -> Result<(f64, f64, f64, f64), BasisError> {
921    if !r.is_finite() || r < 0.0 {
922        crate::bail_invalid_basis!("polyharmonic distance must be finite and non-negative");
923    }
924    assert!(
925        m.is_finite() && m > 0.0,
926        "duchon_polyharmonic_operator_block_jets: m must be finite and > 0, got {m}"
927    );
928
929    let k_half = 0.5 * k_dim as f64;
930    let alpha = 2.0 * m - k_dim as f64;
931    // Log case: k_dim even and `2m − k_dim` is a non-negative even integer
932    // (within ε). For fractional `m` this never fires; for integer `m` it
933    // matches the original `k_dim % 2 == 0 && m >= k_dim / 2` check.
934    const LOG_EPS: f64 = 1e-12;
935    let is_log_case = k_dim.is_multiple_of(2) && {
936        let n_f = (alpha / 2.0).round();
937        n_f >= 0.0 && (n_f * 2.0 - alpha).abs() < LOG_EPS
938    };
939    let (c, phi_log_coeff, phi_pure_coeff) = if is_log_case {
940        let m_int = m.round() as usize;
941        (
942            polyharmonic_log_sign(m_int, k_dim)
943                / (2.0_f64.powi((2 * m_int - 1) as i32)
944                    * std::f64::consts::PI.powf(k_half)
945                    * gamma_lanczos(m)
946                    * gamma_lanczos((m_int - k_dim / 2 + 1) as f64)),
947            1.0,
948            0.0,
949        )
950    } else {
951        (
952            gamma_lanczos(k_half - m)
953                / (4.0_f64.powf(m) * std::f64::consts::PI.powf(k_half) * gamma_lanczos(m)),
954            0.0,
955            1.0,
956        )
957    };
958
959    let (phi_r_exp, phi_r_log, phi_r_pure) =
960        log_power_family_derivative(alpha, phi_log_coeff, phi_pure_coeff);
961    let q_exp = phi_r_exp - 1.0;
962    let q = log_power_family_value(r, c, q_exp, phi_r_log, phi_r_pure);
963
964    let (q_r_exp_raw, q_r_log, q_r_pure) =
965        log_power_family_derivative(q_exp, phi_r_log, phi_r_pure);
966    let t_exp = q_r_exp_raw - 1.0;
967    let t = log_power_family_value(r, c, t_exp, q_r_log, q_r_pure);
968
969    let (t_r_exp, t_r_log, t_r_pure) = log_power_family_derivative(t_exp, q_r_log, q_r_pure);
970    let t_r = log_power_family_value(r, c, t_r_exp, t_r_log, t_r_pure);
971
972    let (t_rr_exp, t_rr_log, t_rr_pure) = log_power_family_derivative(t_r_exp, t_r_log, t_r_pure);
973    let t_rr = log_power_family_value(r, c, t_rr_exp, t_rr_log, t_rr_pure);
974
975    Ok((q, t, t_r, t_rr))
976}
977
978/// Shared Bessel-K ladder for one evaluation point `z = κ·r`.
979///
980/// Every Matérn partial-fraction block and every term of its radial
981/// derivative lattice consumes `K_ν(z)` at orders from ONE parity class
982/// (integer when the covariate dimension is even, half-integer when odd),
983/// differing by integers — and all at the SAME `z`. The previous code
984/// restarted the `K₀/K₁` (or closed-form half-integer) seed evaluation and
985/// the upward recurrence inside every per-term Bessel call: hundreds of
986/// redundant seed+recurrence runs per `(row, center)` pair, which the #979
987/// CTN stage-1 stack profile showed to be the dominant cost of every Duchon
988/// κ-trial at scale. One ladder per point replaces all of them: two seed
989/// evaluations plus the standard upward recurrence
990/// `K_{ν+1}(z) = K_{ν−1}(z) + (2ν/z)·K_ν(z)`, which is the numerically
991/// STABLE direction for `K` (it grows with ν). For integer orders this is
992/// arithmetic-identical to the old per-call `bessel_k_integer_order`, which
993/// ran the same seeds and recurrence internally; for half-integer orders the
994/// recurrence is exact and replaces the per-order closed-form sum.
995pub(crate) struct BesselKLadder {
996    /// `values[i] = K_{base + i}(z)` with `base ∈ {0, ½}`.
997    pub(crate) values: SmallVec<[f64; 16]>,
998    pub(crate) half_integer: bool,
999}
1000
1001impl BesselKLadder {
1002    pub(crate) fn build(z: f64, half_integer: bool, max_order_steps: usize) -> Self {
1003        let zz = z.max(1e-300);
1004        let mut values: SmallVec<[f64; 16]> = SmallVec::with_capacity(max_order_steps + 2);
1005        if half_integer {
1006            // K_{1/2}(z) = √(π/(2z))·e^{−z};  K_{3/2}(z) = K_{1/2}(z)·(1 + 1/z).
1007            let k_half = (std::f64::consts::PI / (2.0 * zz)).sqrt() * (-zz).exp();
1008            values.push(k_half);
1009            values.push(k_half * (1.0 + 1.0 / zz));
1010        } else {
1011            values.push(bessel_k0_stable(zz));
1012            values.push(bessel_k1_stable(zz));
1013        }
1014        let base = if half_integer { 0.5 } else { 0.0 };
1015        for i in 1..max_order_steps {
1016            let nu = base + i as f64;
1017            let next = values[i - 1] + 2.0 * nu * values[i] / zz;
1018            values.push(next);
1019        }
1020        Self {
1021            values,
1022            half_integer,
1023        }
1024    }
1025
1026    /// `K_{|order|}(z)` from the ladder (`K_{−ν} = K_ν`).
1027    #[inline]
1028    pub(crate) fn k_abs(&self, order_abs: f64) -> f64 {
1029        let base = if self.half_integer { 0.5 } else { 0.0 };
1030        let idx = (order_abs - base).round() as usize;
1031        self.values[idx]
1032    }
1033}
1034
1035/// Radial-derivative jets of the Matérn family `coeff·r^μ·K_μ(κr)` up to
1036/// order `max_j ≤ 4`, evaluated against a shared [`BesselKLadder`].
1037///
1038/// Exact recurrence derived from `d/dr[r^ν K_ν(κr)]` and the Bessel identity
1039/// `dK_ν/dz = −K_{ν−1}(z) − (ν/z)K_ν(z)`:
1040///
1041///   g⁽⁰⁾ = c · r^ν · K_ν(z)
1042///   g⁽¹⁾ = −c · κ · r^ν · K_{ν−1}(z)
1043///   g⁽²⁾ = c·κ² r^ν K_{ν−2} − c·κ r^{ν−1} K_{ν−1}, ...
1044///
1045/// Same derivative lattice as the per-order reference implementation
1046/// `duchon_matern_family_radial_derivative_reference` (kept in the test
1047/// module as the equivalence oracle)
1048/// (term-for-term, in the same order), but: (a) the lattice is expanded
1049/// incrementally once instead of rebuilt from scratch per derivative order,
1050/// (b) terms live in a fixed-capacity stack buffer instead of per-call heap
1051/// `Vec`s (≤ 2^max_j ≤ 16 terms), and (c) every Bessel factor is an indexed
1052/// ladder read instead of a fresh seed+recurrence evaluation. Only orders
1053/// `0..=max_j` are computed — the q-family consumes order 0 only and the
1054/// t-family orders ≤ 2, where the old path always expanded to order 4 and
1055/// discarded the tail.
1056pub(crate) fn duchon_matern_family_jets_with_ladder(
1057    r: f64,
1058    kappa: f64,
1059    coeff: f64,
1060    mu: f64,
1061    max_j: usize,
1062    ladder: &BesselKLadder,
1063    out: &mut [f64],
1064) -> Result<(), BasisError> {
1065    if max_j > 4 || out.len() <= max_j {
1066        crate::bail_invalid_basis!(
1067            "Duchon Matérn-family ladder jets support derivative orders 0..=4 with an output slot per order"
1068        );
1069    }
1070    if r <= 0.0 {
1071        out[..=max_j].fill(0.0);
1072        if mu > 0.0 {
1073            out[0] = coeff * 2.0_f64.powf(mu - 1.0) * gamma_lanczos(mu) * kappa.powf(-mu);
1074        }
1075        return Ok(());
1076    }
1077    let mut terms: SmallVec<[DuchonMaternDerivativeTerm; 16]> =
1078        smallvec![DuchonMaternDerivativeTerm {
1079            coeff,
1080            kappa_power: 0,
1081            r_power: mu,
1082            bessel_order: mu,
1083        }];
1084    for (j, slot) in out.iter_mut().enumerate().take(max_j + 1) {
1085        if j > 0 {
1086            let mut next: SmallVec<[DuchonMaternDerivativeTerm; 16]> =
1087                SmallVec::with_capacity(terms.len() * 2);
1088            for term in &terms {
1089                let stay_coeff = term.coeff * (term.r_power - term.bessel_order);
1090                if stay_coeff != 0.0 {
1091                    next.push(DuchonMaternDerivativeTerm {
1092                        coeff: stay_coeff,
1093                        kappa_power: term.kappa_power,
1094                        r_power: term.r_power - 1.0,
1095                        bessel_order: term.bessel_order,
1096                    });
1097                }
1098                next.push(DuchonMaternDerivativeTerm {
1099                    coeff: -term.coeff,
1100                    kappa_power: term.kappa_power + 1,
1101                    r_power: term.r_power,
1102                    bessel_order: term.bessel_order - 1.0,
1103                });
1104            }
1105            terms = next;
1106        }
1107        let mut value = KahanSum::default();
1108        for term in &terms {
1109            if term.coeff == 0.0 {
1110                continue;
1111            }
1112            value.add(
1113                term.coeff
1114                    * kappa.powi(term.kappa_power as i32)
1115                    * r.powf(term.r_power)
1116                    * ladder.k_abs(term.bessel_order.abs()),
1117            );
1118        }
1119        *slot = value.sum();
1120    }
1121    Ok(())
1122}
1123
1124/// Maximum ladder steps (`K_base ..= K_{base+steps}`) needed by the q/t
1125/// operator families of Matérn block `n` in dimension `k_dim`: the q-family
1126/// reads `K_{|ν−1|}` and the t-family `K_{|ν−2−j|}` for `j ≤ 2`, ν = n − d/2.
1127pub(crate) fn duchon_matern_block_max_ladder_steps(n_order: usize, k_dim: usize) -> usize {
1128    let nu = n_order as f64 - 0.5 * k_dim as f64;
1129    let candidates = [
1130        (nu - 1.0).abs(),
1131        (nu - 2.0).abs(),
1132        (nu - 3.0).abs(),
1133        (nu - 4.0).abs(),
1134    ];
1135    let max_abs = candidates.iter().copied().fold(0.0_f64, f64::max);
1136    max_abs.floor() as usize + 1
1137}
1138
1139pub(crate) fn duchon_matern_operator_block_jets_with_ladder(
1140    r: f64,
1141    kappa: f64,
1142    n_order: usize,
1143    k_dim: usize,
1144    ladder: &BesselKLadder,
1145) -> Result<(f64, f64, f64, f64), BasisError> {
1146    if r <= 0.0 {
1147        return Ok((0.0, 0.0, 0.0, 0.0));
1148    }
1149    let n = n_order as f64;
1150    let k_half = 0.5 * k_dim as f64;
1151    let nu = n - k_half;
1152    let c = kappa.powf(k_half - n)
1153        / ((2.0 * std::f64::consts::PI).powf(k_half) * 2.0_f64.powf(n - 1.0) * gamma_lanczos(n));
1154
1155    let mut q_out = [0.0_f64; 1];
1156    duchon_matern_family_jets_with_ladder(r, kappa, -c * kappa, nu - 1.0, 0, ladder, &mut q_out)?;
1157    let mut t_out = [0.0_f64; 3];
1158    duchon_matern_family_jets_with_ladder(
1159        r,
1160        kappa,
1161        c * kappa * kappa,
1162        nu - 2.0,
1163        2,
1164        ladder,
1165        &mut t_out,
1166    )?;
1167    Ok((q_out[0], t_out[0], t_out[1], t_out[2]))
1168}
1169
1170#[inline(always)]
1171pub(crate) fn pure_duchon_block_order(p_order: usize, s_order: f64) -> f64 {
1172    p_order as f64 + s_order
1173}
1174
1175pub(crate) fn validate_duchon_kernel_orders(
1176    length_scale: Option<f64>,
1177    p_order: usize,
1178    s_order: f64,
1179    k_dim: usize,
1180) -> Result<(), BasisError> {
1181    if k_dim == 0 {
1182        crate::bail_invalid_basis!("Duchon basis requires at least one covariate dimension");
1183    }
1184    if let Some(scale) = length_scale
1185        && (!scale.is_finite() || scale <= 0.0)
1186    {
1187        crate::bail_invalid_basis!("Duchon hybrid length_scale must be finite and positive");
1188    }
1189    // Two independent well-posedness conditions on (p, s, d) for pure Duchon.
1190    //
1191    // (1) CPD-vs-nullspace adequacy — gated below on `length_scale.is_none()`.
1192    //     The pure-polyharmonic kernel of effective order m = p+s in R^d is
1193    //     phi(r) = r^{2m-d}, or r^{2m-d}·log r when 2m-d is a non-negative
1194    //     even integer (the "log case", reached precisely when d is even
1195    //     and m >= d/2). Wendland's Theorem 8.17 / 8.18 give its
1196    //     conditional-positive-definiteness order:
1197    //
1198    //         d odd,  exponent half-integer:  ceil((2m-d)/2) = m - (d-1)/2
1199    //         d even, log case:               (2m-d)/2 + 1   = m - d/2 + 1
1200    //
1201    //     Duchon interpolation with polynomial nullspace P_p (polynomials
1202    //     of degree < p) is uniquely solvable iff the kernel's CPD order
1203    //     does not exceed p. Substituting m = p + s:
1204    //
1205    //         d odd:  s <= (d-1)/2     <=>  2s <= d - 1
1206    //         d even: s <= d/2 - 1     <=>  2s <= d - 2
1207    //
1208    //     Both branches collapse to `2s < d` once we use that s and d are
1209    //     integers and 2s is therefore even (so `2s = d - 1` is impossible
1210    //     for even d, and `2s <= d - 2` is just `2s < d`).
1211    //
1212    //     Counter-example admitted if this guard is dropped: d=2, p=1, s=1
1213    //     passes the spectral check (2(1+1)=4 > 2) and builds the TPS
1214    //     kernel c·r²·log r against a constants-only nullspace P_1; the
1215    //     interpolation form is not PD on lambda perp P_1 and the fitted
1216    //     penalty is meaningless.
1217    //
1218    //     The hybrid (Matérn-blended) Duchon kernel sidesteps this entirely:
1219    //     the Matérn remainder is strictly positive definite (CPD order 0),
1220    //     so any P_p suffices — hence the `length_scale.is_none()` gate.
1221    //
1222    // (2) Spectral kernel-existence — universal, gated below on the sum.
1223    //     The pointwise kernel comes from the inverse Fourier of
1224    //     1/|xi|^{2(p+s)}, which is a finite distribution at the origin
1225    //     iff `2(p+s) > d`. Below that threshold the radial kernel value
1226    //     diverges and there is nothing to evaluate.
1227    if !s_order.is_finite() || s_order < 0.0 {
1228        crate::bail_invalid_basis!("Duchon spectral power must be finite and ≥ 0; got s={s_order}");
1229    }
1230    if length_scale.is_none() && p_order < 2 && 2.0 * s_order >= k_dim as f64 {
1231        crate::bail_invalid_basis!(
1232            "pure Duchon requires power < dimension/2 for nullspace degree < {p_order}; got power={s_order}, dimension={k_dim}"
1233        );
1234    }
1235    let spectral_order = 2.0 * (p_order as f64 + s_order);
1236    if spectral_order <= k_dim as f64 {
1237        crate::bail_invalid_basis!(
1238            "Duchon pointwise kernel values require 2*(p+s) > dimension; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1239        );
1240    }
1241    Ok(())
1242}
1243
1244pub(crate) fn validate_duchon_collocation_orders(
1245    length_scale: Option<f64>,
1246    p_order: usize,
1247    s_order: f64,
1248    k_dim: usize,
1249    max_operator_derivative_order: usize,
1250) -> Result<(), BasisError> {
1251    // Kernel-level conditions (existence + CPD/nullspace adequacy) come first;
1252    // the operator-level conditions below build on a pointwise-valid kernel.
1253    validate_duchon_kernel_orders(length_scale, p_order, s_order, k_dim)?;
1254    // The spectral_order > k_dim + k checks below are C^k-at-origin
1255    // conditions: for the polyharmonic kernel r^{2(p+s)-d} (or the log
1256    // variant) to admit k-th radial derivatives in the distributional sense
1257    // — and therefore for k-th-order derivative *collocation* of the
1258    // kernel against centers to produce a finite operator — we need its
1259    // exponent to clear the next k orders of differentiation at the
1260    // origin. Equivalently: 2(p+s) - d > k.
1261    //
1262    // Note these are independent of the CPD/nullspace check. The penalty
1263    // matrices ultimately built from these collocation operators are of
1264    // the form S = D_k^T D_k and are PSD by construction; the discipline
1265    // here is purely about *existence* of D_k itself.
1266    let spectral_order = 2.0 * (p_order as f64 + s_order);
1267    if max_operator_derivative_order >= 1 && spectral_order <= k_dim as f64 + 1.0 {
1268        crate::bail_invalid_basis!(
1269            "Duchon D1 collocation requires 2*(p+s) > dimension+1; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1270        );
1271    }
1272    if max_operator_derivative_order >= 2 && spectral_order <= k_dim as f64 + 2.0 {
1273        crate::bail_invalid_basis!(
1274            "Duchon D2 collocation requires 2*(p+s) > dimension+2; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1275        );
1276    }
1277    Ok(())
1278}
1279
1280#[derive(Debug, Clone)]
1281pub struct DuchonPartialFractionCoeffs {
1282    pub(crate) a: Vec<f64>,
1283    pub(crate) b: Vec<f64>,
1284}
1285
1286#[inline(always)]
1287pub(crate) fn duchon_partial_fraction_coeffs(
1288    p_order: usize,
1289    s_order: usize,
1290    kappa: f64,
1291) -> DuchonPartialFractionCoeffs {
1292    // 1/(ρ^{2p}(κ²+ρ²)^s) = Σ a_m/ρ^{2m} + Σ b_n/(κ²+ρ²)^n
1293    let mut a = vec![0.0_f64; p_order + 1]; // 1-based m
1294    let mut b = vec![0.0_f64; s_order + 1]; // 1-based n
1295    if s_order == 0 {
1296        if p_order > 0 {
1297            // Pure intrinsic polyharmonic case: no Matérn tail remains, so the
1298            // spectrum is exactly 1 / ρ^(2p).
1299            a[p_order] = 1.0;
1300        }
1301        return DuchonPartialFractionCoeffs { a, b };
1302    }
1303    for m in 1..=p_order {
1304        let sign = if (p_order - m).is_multiple_of(2) {
1305            1.0
1306        } else {
1307            -1.0
1308        };
1309        let expo = -2.0 * (s_order + p_order - m) as f64;
1310        let comb = binomial_f64(s_order + p_order - m - 1, p_order - m);
1311        a[m] = sign * kappa.powf(expo) * comb;
1312    }
1313    for n in 1..=s_order {
1314        let sign = if p_order.is_multiple_of(2) { 1.0 } else { -1.0 };
1315        let expo = -2.0 * (p_order + s_order - n) as f64;
1316        let comb = if p_order == 0 && n == s_order {
1317            // p=0 reduces to the pure Matérn block 1/(κ²+ρ²)^s.
1318            1.0
1319        } else {
1320            let top = p_order + s_order - n - 1;
1321            binomial_f64(top, s_order - n)
1322        };
1323        b[n] = sign * kappa.powf(expo) * comb;
1324    }
1325    DuchonPartialFractionCoeffs { a, b }
1326}
1327
1328/// 64-node Gauss–Legendre rule on `[0, 1]` (nodes already mapped from the
1329/// canonical `[-1, 1]` interval, weights scaled by the `1/2` Jacobian).
1330///
1331/// Used by [`duchon_hybrid_kernel_stable_integral`] to evaluate the hybrid
1332/// Duchon–Matérn kernel without the catastrophically-cancelling
1333/// partial-fraction sum (gam#1424). The integrand is smooth and strictly
1334/// positive on `(0, 1)`, so a fixed high-order rule reproduces the kernel to
1335/// ~1e-15 relative accuracy across all reachable high-dimensional orders.
1336fn gauss_legendre_01_64() -> &'static [(f64, f64)] {
1337    use std::sync::OnceLock;
1338    static NODES: OnceLock<Vec<(f64, f64)>> = OnceLock::new();
1339    NODES.get_or_init(|| {
1340        // Newton iteration on the Legendre polynomial roots (the classic
1341        // `gauleg` recipe). The N-point rule is symmetric about the midpoint, so
1342        // only the lower half of the roots is solved for and the rule is
1343        // mirrored. Computed once; converges to full f64 precision in a handful
1344        // of Newton steps per root.
1345        const N: usize = 64;
1346        let nf = N as f64;
1347        let mut nodes: Vec<(f64, f64)> = Vec::with_capacity(N);
1348        let half = N.div_ceil(2);
1349        for i in 0..half {
1350            // Initial guess for the i-th root on [-1, 1] (Chebyshev-like).
1351            let mut x = (std::f64::consts::PI * (i as f64 + 0.75) / (nf + 0.5)).cos();
1352            let mut dp = 0.0_f64;
1353            for _ in 0..100 {
1354                // Evaluate the Legendre polynomial P_N(x) and derivative P_N'(x)
1355                // via the three-term recurrence.
1356                let mut p0 = 1.0_f64;
1357                let mut p1 = x;
1358                for k in 2..=N {
1359                    let kf = k as f64;
1360                    let p2 = ((2.0 * kf - 1.0) * x * p1 - (kf - 1.0) * p0) / kf;
1361                    p0 = p1;
1362                    p1 = p2;
1363                }
1364                // P_N'(x) = N (x P_N(x) − P_{N−1}(x)) / (x² − 1).
1365                dp = nf * (x * p1 - p0) / (x * x - 1.0);
1366                let dx = p1 / dp;
1367                x -= dx;
1368                if dx.abs() <= 1e-16 * x.abs().max(1.0) {
1369                    break;
1370                }
1371            }
1372            // Gauss–Legendre weight: 2 / ((1 − x²) P_N'(x)²).
1373            let w = 2.0 / ((1.0 - x * x) * dp * dp);
1374            // x is the i-th root counting inward from +1; mirror to −x.
1375            nodes.push((x, w));
1376            if x.abs() > 1e-300 {
1377                nodes.push((-x, w));
1378            }
1379        }
1380        // Sort by node, then map [-1, 1] -> [0, 1] with the 1/2 Jacobian.
1381        nodes.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
1382        nodes
1383            .into_iter()
1384            .map(|(x, w)| (0.5 * (x + 1.0), 0.5 * w))
1385            .collect()
1386    })
1387}
1388
1389/// Evaluate the hybrid Duchon–Matérn kernel
1390/// `φ(r) = F^{-1}[ ρ^{-2p} (κ²+ρ²)^{-s} ](r)` via a single, cancellation-free
1391/// 1-D integral (gam#1424).
1392///
1393/// The partial-fraction expansion `Σ a_m/ρ^{2m} + Σ b_n/(κ²+ρ²)^n` evaluates
1394/// the radial kernel as an alternating sum of individually huge polyharmonic
1395/// (`r^{2m-d}`) and Matérn blocks whose leading singular parts cancel. For
1396/// high `d` (e.g. d=16, p=2, s=7) the largest block is ~1e3 while the true
1397/// value is ~1e-13, so f64 loses *every* significant digit and the resulting
1398/// Gram matrix is no longer PSD (λ_min ≈ −0.26 after normalization).
1399///
1400/// Using the Schwinger / Feynman parametrization of both rational factors and
1401/// performing the Gaussian (radial inverse-FT) integral analytically reduces
1402/// the kernel to
1403///
1404/// ```text
1405///   φ(r) = (4π)^{-d/2} / (Γ(p)Γ(s))
1406///          · ∫₀¹ (1-w)^{p-1} w^{s-1} · 2(B/A)^{b/2} K_b(2√(AB)) dw,
1407///   with  b = p + s − d/2,  A = w κ²,  B = r²/4.
1408/// ```
1409///
1410/// The integrand is smooth and strictly positive on `(0, 1)` (no cancellation),
1411/// so a fixed 64-point Gauss–Legendre rule is accurate to ~1e-15 relative.
1412/// The `r = 0` diagonal has the closed form
1413/// `φ(0) = (4π)^{-d/2} Γ(b)/(Γ(p)Γ(s)) κ^{-2b} B(s−b, p)`.
1414///
1415/// Requires `b = p + s − d/2 > 0` (kernel existence, `2(p+s) > d`) and
1416/// `s − b = d/2 − p > 0` (integrable `w → 0` endpoint), i.e. `2p < d`. Callers
1417/// must check [`duchon_hybrid_stable_integral_applies`] before invoking.
1418pub(crate) fn duchon_hybrid_kernel_stable_integral(
1419    r: f64,
1420    kappa: f64,
1421    p_order: usize,
1422    s_order: usize,
1423    k_dim: usize,
1424) -> Result<f64, BasisError> {
1425    assert!(
1426        duchon_hybrid_stable_integral_applies(p_order, s_order, k_dim),
1427        "duchon_hybrid_kernel_stable_integral precondition violated: 2(p+s) > d and 2p < d required (p={p_order}, s={s_order}, d={k_dim})"
1428    );
1429    let p = p_order as f64;
1430    let s = s_order as f64;
1431    let half_d = 0.5 * k_dim as f64;
1432    let b = p + s - half_d;
1433    let pref = (4.0 * std::f64::consts::PI).powf(-half_d) / (gamma_lanczos(p) * gamma_lanczos(s));
1434    if r == 0.0 {
1435        // φ(0) = pref · Γ(b) · κ^{-2b} · B(s−b, p),  B(x,y)=Γ(x)Γ(y)/Γ(x+y).
1436        let beta = gamma_lanczos(s - b) * gamma_lanczos(p) / gamma_lanczos(s - b + p);
1437        let value = pref * gamma_lanczos(b) * kappa.powf(-2.0 * b) * beta;
1438        if !value.is_finite() {
1439            crate::bail_invalid_basis!(
1440                "non-finite Duchon hybrid diagonal (stable form) for p={p_order}, s={s_order}, d={k_dim}"
1441            );
1442        }
1443        return Ok(value);
1444    }
1445    let mut acc = KahanSum::default();
1446    for &(w, weight) in gauss_legendre_01_64() {
1447        // Smooth term  2(B/A)^{b/2} K_b(2√(AB)) = 2 (r/(2κ√w))^b K_b(κ r √w).
1448        let sqrt_w = w.sqrt();
1449        let z = (kappa * r * sqrt_w).max(1e-300);
1450        let k_b = bessel_k_real_half_integer_or_integer(b.abs(), z)?;
1451        let smooth = 2.0 * (r / (2.0 * kappa * sqrt_w)).powf(b) * k_b;
1452        let factor = (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * smooth;
1453        acc.add(weight * factor);
1454    }
1455    let value = pref * acc.sum();
1456    if !value.is_finite() {
1457        crate::bail_invalid_basis!(
1458            "non-finite Duchon hybrid value (stable form) at r={r}, p={p_order}, s={s_order}, d={k_dim}"
1459        );
1460    }
1461    Ok(value)
1462}
1463
1464/// Radial operator scalars `(q, t, t_r, t_rr)` of the hybrid Duchon–Matérn
1465/// kernel via the same cancellation-free single integral as
1466/// [`duchon_hybrid_kernel_stable_integral`], differentiated under the integral
1467/// sign (gam#1424 / gam#1453).
1468///
1469/// The partial-fraction operator core (`duchon_regularized_operator_core`)
1470/// assembles `q, t` as a sign-alternating sum of polyharmonic and Matérn
1471/// *operator* blocks. In high dimensions (e.g. d=16, p=1, s=9) each block is
1472/// ~1e3 while the true operator scalar is ~1e-13, so f64 loses every
1473/// significant digit — Kahan summation fixes accumulation, not the
1474/// cancellation between huge opposing terms, leaving `q, t` with ~1e-2 relative
1475/// noise. That floor sits above the Chebyshev profile certificate, so the
1476/// production profile cannot certify (gam#1453).
1477///
1478/// This routine instead differentiates the smooth per-`w` integrand
1479/// `g(r,w) = 2 (r/(2c))^b K_b(c r)`, `c = κ√w`, in `r`. Each `w`-slice is a
1480/// single well-conditioned `r^a K_ν(c r)` term whose `r`-derivatives are exact
1481/// (`d/dr[r^a K_ν(c r)] = a r^{a-1} K_ν(c r) − (c/2) r^a (K_{ν-1}+K_{ν+1})`),
1482/// so there is no cross-block cancellation. The radial derivatives `φ′…φ⁗`
1483/// are integrated against the same `(1-w)^{p-1} w^{s-1}` weight and the
1484/// 64-node Gauss–Legendre rule, then the operator scalars are assembled from
1485/// the standard radial relations
1486/// `q = φ′/r`, `t = q′/r`, `t_r = (q″−t)/r`, `t_rr = q‴/r − 2q″/r² + 2q′/r³`.
1487///
1488/// Requires the same precondition as the kernel form
1489/// ([`duchon_hybrid_stable_integral_applies`]) and `r > 0`.
1490pub(crate) fn duchon_hybrid_operator_stable_integral(
1491    r: f64,
1492    kappa: f64,
1493    p_order: usize,
1494    s_order: usize,
1495    k_dim: usize,
1496) -> Result<DuchonRegularizedOperatorCore, BasisError> {
1497    assert!(
1498        duchon_hybrid_stable_integral_applies(p_order, s_order, k_dim),
1499        "duchon_hybrid_operator_stable_integral precondition violated: 2(p+s) > d and 2p < d required (p={p_order}, s={s_order}, d={k_dim})"
1500    );
1501    assert!(
1502        r > 0.0 && r.is_finite(),
1503        "duchon_hybrid_operator_stable_integral requires r > 0, got r={r}"
1504    );
1505    let p = p_order as f64;
1506    let s = s_order as f64;
1507    let half_d = 0.5 * k_dim as f64;
1508    let b = p + s - half_d;
1509    let pref = (4.0 * std::f64::consts::PI).powf(-half_d) / (gamma_lanczos(p) * gamma_lanczos(s));
1510
1511    // Accumulate φ′, φ″, φ‴, φ⁗ across the Gauss–Legendre nodes. (φ itself is
1512    // not needed for the operator scalars.)
1513    let mut d1 = KahanSum::default();
1514    let mut d2 = KahanSum::default();
1515    let mut d3 = KahanSum::default();
1516    let mut d4 = KahanSum::default();
1517
1518    for &(w, weight) in gauss_legendre_01_64() {
1519        let sqrt_w = w.sqrt();
1520        let c = (kappa * sqrt_w).max(1e-300);
1521        let z = (c * r).max(1e-300);
1522
1523        // Smooth integrand g(r) = A · r^b · K_b(c r),  A = 2 (2c)^{-b}.
1524        // Differentiate the symbolic term list (coef, a, ν-offset) in r:
1525        //   d/dr[c0 r^a K_{b+j}(c r)]
1526        //     = c0·a · r^{a-1} K_{b+j}(c r)
1527        //       − c0·(c/2) · r^a (K_{b+j-1}(c r) + K_{b+j+1}(c r)).
1528        // Four derivatives need ν-offsets in [-4, 4] around b.
1529        let a0 = 2.0 * (2.0 * c).powf(-b);
1530        let mut terms: Vec<(f64, f64, i32)> = vec![(a0, b, 0)];
1531        // Cache K_{b+j}(z) for j ∈ [-4, 4] (K is even in order → use |·|).
1532        let bessel = |j: i32| -> Result<f64, BasisError> {
1533            bessel_k_real_half_integer_or_integer((b + j as f64).abs(), z)
1534        };
1535        let evaluate = |terms: &Vec<(f64, f64, i32)>| -> Result<f64, BasisError> {
1536            let mut acc = KahanSum::default();
1537            for &(c0, a, j) in terms {
1538                if c0 == 0.0 {
1539                    continue;
1540                }
1541                acc.add(c0 * r.powf(a) * bessel(j)?);
1542            }
1543            Ok(acc.sum())
1544        };
1545
1546        let mut slice_derivs = [0.0_f64; 4];
1547        for slot in slice_derivs.iter_mut() {
1548            // Differentiate the current term list once.
1549            let mut next: Vec<(f64, f64, i32)> = Vec::with_capacity(terms.len() * 3);
1550            for &(c0, a, j) in &terms {
1551                if c0 == 0.0 {
1552                    continue;
1553                }
1554                if a != 0.0 {
1555                    next.push((c0 * a, a - 1.0, j));
1556                }
1557                let half = -c0 * c * 0.5;
1558                next.push((half, a, j - 1));
1559                next.push((half, a, j + 1));
1560            }
1561            terms = next;
1562            *slot = evaluate(&terms)?;
1563        }
1564
1565        d1.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[0]);
1566        d2.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[1]);
1567        d3.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[2]);
1568        d4.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[3]);
1569    }
1570
1571    let phi1 = pref * d1.sum();
1572    let phi2 = pref * d2.sum();
1573    let phi3 = pref * d3.sum();
1574    let phi4 = pref * d4.sum();
1575    if !(phi1.is_finite() && phi2.is_finite() && phi3.is_finite() && phi4.is_finite()) {
1576        crate::bail_invalid_basis!(
1577            "non-finite Duchon hybrid operator (stable form) at r={r}, p={p_order}, s={s_order}, d={k_dim}"
1578        );
1579    }
1580
1581    // Assemble the operator scalars from the radial derivatives. For r > 0
1582    // these divisions are removable-singularity quotients of moderate
1583    // quantities (no cancellation between blocks remains).
1584    let inv_r = 1.0 / r;
1585    let q = phi1 * inv_r;
1586    // q′ = φ″/r − φ′/r²; q″ = φ‴/r − 2φ″/r² + 2φ′/r³;
1587    // q‴ = φ⁗/r − 3φ‴/r² + 6φ″/r³ − 6φ′/r⁴.
1588    let q_r = phi2 * inv_r - phi1 * inv_r * inv_r;
1589    let q_rr = phi3 * inv_r - 2.0 * phi2 * inv_r * inv_r + 2.0 * phi1 * inv_r * inv_r * inv_r;
1590    let q_rrr = phi4 * inv_r - 3.0 * phi3 * inv_r * inv_r + 6.0 * phi2 * inv_r * inv_r * inv_r
1591        - 6.0 * phi1 * inv_r * inv_r * inv_r * inv_r;
1592    let t = q_r * inv_r;
1593    let t_r = q_rr * inv_r - q_r * inv_r * inv_r;
1594    let t_rr = q_rrr * inv_r - 2.0 * q_rr * inv_r * inv_r + 2.0 * q_r * inv_r * inv_r * inv_r;
1595
1596    Ok(DuchonRegularizedOperatorCore { q, t, t_r, t_rr })
1597}
1598
1599/// Whether the cancellation-free [`duchon_hybrid_kernel_stable_integral`] is
1600/// applicable for these orders: a genuine Matérn blend (`s ≥ 1`) whose
1601/// single-integral reduction has an integrable `w → 0` endpoint (`2p < d`).
1602///
1603/// The complementary cases — `s = 0` (pure polyharmonic, already evaluated
1604/// directly with no cancellation) and `2p ≥ d` (only reachable at low `d`,
1605/// where the partial-fraction sum has no meaningful cancellation) — retain the
1606/// existing partial-fraction path.
1607#[inline]
1608pub(crate) fn duchon_hybrid_stable_integral_applies(
1609    p_order: usize,
1610    s_order: usize,
1611    k_dim: usize,
1612) -> bool {
1613    s_order >= 1 && 2 * p_order < k_dim
1614}
1615
1616pub(crate) fn duchon_matern_kernel_general_from_distance(
1617    r: f64,
1618    length_scale: Option<f64>,
1619    p_order: usize,
1620    s_order: usize,
1621    k_dim: usize,
1622    coeffs: Option<&DuchonPartialFractionCoeffs>,
1623) -> Result<f64, BasisError> {
1624    if !r.is_finite() || r < 0.0 {
1625        crate::bail_invalid_basis!("Duchon kernel distance must be finite and non-negative");
1626    }
1627    let Some(length_scale) = length_scale else {
1628        return Ok(polyharmonic_kernel(
1629            r,
1630            pure_duchon_block_order(p_order, s_order as f64),
1631            k_dim,
1632        ));
1633    };
1634    if !length_scale.is_finite() || length_scale <= 0.0 {
1635        crate::bail_invalid_basis!("Duchon hybrid length_scale must be finite and positive");
1636    }
1637    let kappa = 1.0 / length_scale;
1638
1639    // gam#1424: for genuine high-dimensional Matérn blends the partial-fraction
1640    // sum below cancels catastrophically (the largest block dwarfs the true
1641    // ~1e-13 kernel value, destroying every significant digit and the PSD
1642    // property of the Gram matrix). Evaluate those orders with the
1643    // cancellation-free single-integral form instead — it also handles the
1644    // `r = 0` diagonal in closed form, so it short-circuits before the
1645    // near-collision Taylor branch.
1646    if duchon_hybrid_stable_integral_applies(p_order, s_order, k_dim) {
1647        return duchon_hybrid_kernel_stable_integral(r, kappa, p_order, s_order, k_dim);
1648    }
1649
1650    let coeffs_local;
1651    let coeffs_ref = if let Some(c) = coeffs {
1652        c
1653    } else {
1654        coeffs_local = duchon_partial_fraction_coeffs(p_order, s_order, kappa);
1655        &coeffs_local
1656    };
1657    let collision_taylor_radius = DUCHON_COLLISION_TAYLOR_REL * length_scale.max(1e-8);
1658    // The near-collision Taylor expansion uses phi(0) plus even-order
1659    // derivative collision limits. Those limits only exist when the kernel
1660    // is finite at the origin, i.e. when 2(p+s) > d. Below that threshold
1661    // the partial-fraction blocks individually diverge at r=0 but their
1662    // sum is still a well-defined function for any r > 0 (each Bessel-K
1663    // and r^{2m-d}-type block is finite away from origin). Fall through
1664    // to the direct sum in that regime; r=0 itself remains an error.
1665    let kernel_finite_at_origin = 2 * (p_order + s_order) > k_dim;
1666    if r <= collision_taylor_radius && kernel_finite_at_origin {
1667        return duchon_hybrid_kernel_near_collision_value(
1668            r,
1669            length_scale,
1670            p_order,
1671            s_order,
1672            k_dim,
1673            coeffs_ref,
1674        );
1675    }
1676    let mut val = KahanSum::default();
1677    for (m, coeff) in coeffs_ref.a.iter().enumerate().skip(1) {
1678        if *coeff == 0.0 {
1679            continue;
1680        }
1681        val.add(coeff * polyharmonic_kernel(r, (m) as f64, k_dim));
1682    }
1683    for (n, coeff) in coeffs_ref.b.iter().enumerate().skip(1) {
1684        if *coeff == 0.0 {
1685            continue;
1686        }
1687        val.add(coeff * duchon_matern_block(r, kappa, n, k_dim)?);
1688    }
1689    Ok(val.sum())
1690}
1691
1692pub(crate) fn duchon_hybrid_kernel_collision_value(
1693    length_scale: f64,
1694    p_order: usize,
1695    s_order: usize,
1696    k_dim: usize,
1697    coeffs: &DuchonPartialFractionCoeffs,
1698) -> Result<f64, BasisError> {
1699    let spectral_order = 2 * (p_order + s_order);
1700    if spectral_order <= k_dim {
1701        crate::bail_invalid_basis!(
1702            "Duchon hybrid diagonal is not finite when 2*(p+s) <= dimension; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1703        );
1704    }
1705
1706    let kappa = 1.0 / length_scale.max(1e-300);
1707    let mut pure = KahanSum::default();
1708    let mut log_part = KahanSum::default();
1709    for (m, &a_m) in coeffs.a.iter().enumerate().skip(1) {
1710        if a_m == 0.0 {
1711            continue;
1712        }
1713        let (block_pure, block_log) = duchon_polyharmonic_block_taylor_r2j(m, k_dim, 0);
1714        pure.add(a_m * block_pure);
1715        log_part.add(a_m * block_log);
1716    }
1717    for (n, &b_n) in coeffs.b.iter().enumerate().skip(1) {
1718        if b_n == 0.0 {
1719            continue;
1720        }
1721        let (block_pure, block_log) = duchon_matern_block_taylor_r2j(kappa, n, k_dim, 0);
1722        pure.add(b_n * block_pure);
1723        log_part.add(b_n * block_log);
1724    }
1725    let value = pure.sum();
1726    let log_value = log_part.sum();
1727    if log_value.abs() > 1e-8 * value.abs().max(1e-30) {
1728        crate::bail_invalid_basis!(
1729            "Duchon hybrid diagonal log terms did not cancel: log={log_value:.6e}, value={value:.6e}; p={p_order}, s={s_order}, d={k_dim}"
1730        );
1731    }
1732    if !value.is_finite() {
1733        crate::bail_invalid_basis!(
1734            "non-finite Duchon hybrid diagonal value for p={p_order}, s={s_order}, d={k_dim}"
1735        );
1736    }
1737    Ok(value)
1738}
1739
1740pub(crate) fn duchon_hybrid_kernel_near_collision_value(
1741    r: f64,
1742    length_scale: f64,
1743    p_order: usize,
1744    s_order: usize,
1745    k_dim: usize,
1746    coeffs: &DuchonPartialFractionCoeffs,
1747) -> Result<f64, BasisError> {
1748    let mut value =
1749        duchon_hybrid_kernel_collision_value(length_scale, p_order, s_order, k_dim, coeffs)?;
1750    if r == 0.0 {
1751        return Ok(value);
1752    }
1753
1754    // Radial Taylor expansion about the center collision:
1755    //
1756    //   phi(r) = phi(0)
1757    //          + phi''(0) r^2 / 2
1758    //          + phi''''(0) r^4 / 24
1759    //          + phi''''''(0) r^6 / 720 + ...
1760    //
1761    // Odd terms vanish for an isotropic radial kernel. A finite 2q-th
1762    // derivative at zero requires spectral smoothness 2(p+s) > d + 2q.
1763    // Terms whose collision derivative does not exist are omitted; this is
1764    // still strictly better than evaluating the raw partial-fraction sum at a
1765    // tiny nonzero radius, where large singular components cancel only after
1766    // losing many digits.
1767    let smoothness_order = 2 * (p_order + s_order);
1768    let r2 = r * r;
1769    if smoothness_order > k_dim + 2 {
1770        let (phi_rr, _, _) =
1771            duchonphi_rr_collision_psi_triplet(length_scale, p_order, s_order, k_dim, coeffs)?;
1772        value += 0.5 * phi_rr * r2;
1773    }
1774    if smoothness_order > k_dim + 4 {
1775        let phi_rrrr = duchon_phi_rrrr_collision(length_scale, p_order, s_order, k_dim, coeffs)?;
1776        value += (1.0 / 24.0) * phi_rrrr * r2 * r2;
1777    }
1778    if smoothness_order > k_dim + 6 {
1779        let phi_rrrrrr =
1780            duchon_phi_rrrrrr_collision(length_scale, p_order, s_order, k_dim, coeffs)?;
1781        value += (1.0 / 720.0) * phi_rrrrrr * r2 * r2 * r2;
1782    }
1783    if !value.is_finite() {
1784        crate::bail_invalid_basis!(
1785            "non-finite Duchon hybrid near-collision value at r={r}, p={p_order}, s={s_order}, d={k_dim}"
1786        );
1787    }
1788    Ok(value)
1789}
1790
1791#[inline(always)]
1792pub(crate) fn stable_euclidean_norm<I>(components: I) -> f64
1793where
1794    I: IntoIterator<Item = f64>,
1795{
1796    let mut scale = 0.0_f64;
1797    let mut sumsq = 1.0_f64;
1798    let mut has_nonzero = false;
1799    for component in components {
1800        let abs = component.abs();
1801        if abs == 0.0 {
1802            continue;
1803        }
1804        if !abs.is_finite() {
1805            return f64::INFINITY;
1806        }
1807        if !has_nonzero {
1808            scale = abs;
1809            has_nonzero = true;
1810            continue;
1811        }
1812        if scale < abs {
1813            let ratio = scale / abs;
1814            sumsq = 1.0 + sumsq * ratio * ratio;
1815            scale = abs;
1816        } else {
1817            let ratio = abs / scale;
1818            sumsq += ratio * ratio;
1819        }
1820    }
1821    if has_nonzero {
1822        scale * sumsq.sqrt()
1823    } else {
1824        0.0
1825    }
1826}
1827
1828#[inline]
1829pub(crate) fn centered_aniso_log_scale_mean(eta: &[f64]) -> f64 {
1830    if eta.len() <= 1 {
1831        0.0
1832    } else {
1833        eta.iter().sum::<f64>() / eta.len() as f64
1834    }
1835}
1836
1837#[inline]
1838pub(crate) fn centered_aniso_log_scale(value: f64, mean: f64) -> f64 {
1839    // This bound exists solely to keep the downstream `.exp()` (axis scale and
1840    // metric weight) finite. `f64::clamp` leaves NaN as NaN, so a non-finite
1841    // contrast (e.g. an `inf − inf` from a degenerate anisotropy `eta`) would
1842    // slip through and poison the Gram matrix. Map any non-finite difference to
1843    // the saturating bound explicitly; finite inputs take the identical clamp.
1844    let centered = value - mean;
1845    if centered.is_finite() {
1846        centered.clamp(-50.0, 50.0)
1847    } else if centered > 0.0 {
1848        50.0
1849    } else {
1850        -50.0
1851    }
1852}
1853
1854#[inline]
1855pub(crate) fn aniso_axis_scale(value: f64, mean: f64) -> f64 {
1856    centered_aniso_log_scale(value, mean).exp()
1857}
1858
1859#[inline]
1860pub(crate) fn aniso_metric_weight(value: f64, mean: f64) -> f64 {
1861    (2.0 * centered_aniso_log_scale(value, mean)).exp()
1862}
1863
1864pub(crate) fn centered_aniso_metric_weights(eta: &[f64]) -> Vec<f64> {
1865    let mean = centered_aniso_log_scale_mean(eta);
1866    eta.iter()
1867        .map(|&value| aniso_metric_weight(value, mean))
1868        .collect()
1869}
1870
1871/// Compute anisotropic squared distance components and total distance.
1872///
1873/// This is the core of **geometric anisotropy**: a linear warp Λ = diag(κ_a)
1874/// turns ellipsoidal correlation contours into isotropic ones. Writing h = x − c,
1875/// z = Λh, the anisotropic distance is r = |z| = |Λh|.
1876///
1877/// We decompose Λ = κ · A where det(A) = 1, parameterized as
1878///   ψ_a = ψ̄ + η_a,   Σ η_a = 0
1879/// where ψ̄ is the global scale (existing scalar κ) and η_a are d−1 anisotropy
1880/// contrasts. This separates scale from shape and preserves the Duchon scaling
1881/// law φ(r;κ) = κ^δ H(κr) for the global part.
1882///
1883/// Given per-axis log-scales `eta`, the identifiable centered contrasts are
1884/// ψ_a = eta_a - mean(eta). The metric uses those contrasts so Σ_a ψ_a = 0
1885/// even when a caller passes an uncentered vector:
1886///
1887///   r = √( Σ_a exp(2·ψ_a) · (x_a - c_a)² )
1888///
1889/// Returns `(r, s_vec)` where `s_vec[a] = exp(2·ψ_a) · h_a²` is the
1890/// per-axis weighted squared displacement. These components are needed for
1891/// per-axis derivatives: `∂φ/∂ψ_a = q · s_a`.
1892///
1893/// The derivative chain through r gives:
1894///   ∇_ψ r      = s / r
1895///   ∇²_ψ r     = (2/r) Diag(s) − (1/r³) ss'
1896/// which is diagonal + rank-1, so Hessian-vector products are O(d).
1897#[inline]
1898pub(crate) fn aniso_distance_and_components(
1899    data_row: &[f64],
1900    center: &[f64],
1901    eta: &[f64],
1902) -> (f64, Vec<f64>) {
1903    assert_eq!(data_row.len(), center.len());
1904    assert_eq!(data_row.len(), eta.len());
1905    let d = data_row.len();
1906    let eta_mean = centered_aniso_log_scale_mean(eta);
1907    let mut s_vec = Vec::with_capacity(d);
1908    let mut scaled_components = Vec::with_capacity(d);
1909    for a in 0..d {
1910        let h_a = data_row[a] - center[a];
1911        // Clamp exp(2ψ) to avoid overflow/underflow: ψ in [-50, 50].
1912        let scale_a = aniso_axis_scale(eta[a], eta_mean);
1913        let scaled_h_a = scale_a * h_a;
1914        let s_a = scaled_h_a * scaled_h_a;
1915        scaled_components.push(scaled_h_a);
1916        s_vec.push(s_a);
1917    }
1918    (stable_euclidean_norm(scaled_components), s_vec)
1919}
1920
1921/// Compute anisotropic distance without returning per-axis components.
1922///
1923/// This is the lightweight version of [`aniso_distance_and_components`] for
1924/// call sites that only need the scalar distance `r`.
1925#[inline]
1926pub(crate) fn aniso_distance(data_row: &[f64], center: &[f64], eta: &[f64]) -> f64 {
1927    assert_eq!(data_row.len(), center.len());
1928    assert_eq!(data_row.len(), eta.len());
1929    let eta_mean = centered_aniso_log_scale_mean(eta);
1930    stable_euclidean_norm(
1931        (0..data_row.len()).map(|a| aniso_axis_scale(eta[a], eta_mean) * (data_row[a] - center[a])),
1932    )
1933}
1934
1935#[inline(always)]
1936pub(crate) fn euclidean_distance_rows(
1937    lhs: ArrayView2<'_, f64>,
1938    lhs_row: usize,
1939    rhs: ArrayView2<'_, f64>,
1940    rhs_row: usize,
1941) -> f64 {
1942    assert_eq!(lhs.ncols(), rhs.ncols());
1943    stable_euclidean_norm((0..lhs.ncols()).map(|axis| lhs[[lhs_row, axis]] - rhs[[rhs_row, axis]]))
1944}
1945
1946#[inline(always)]
1947pub(crate) fn aniso_axis_scales(eta: &[f64]) -> Vec<f64> {
1948    let eta_mean = centered_aniso_log_scale_mean(eta);
1949    eta.iter()
1950        .map(|&value| aniso_axis_scale(value, eta_mean))
1951        .collect()
1952}
1953
1954#[inline(always)]
1955pub(crate) fn aniso_distance_rows_with_scales(
1956    lhs: ArrayView2<'_, f64>,
1957    lhs_row: usize,
1958    rhs: ArrayView2<'_, f64>,
1959    rhs_row: usize,
1960    axis_scales: &[f64],
1961) -> f64 {
1962    assert_eq!(lhs.ncols(), rhs.ncols());
1963    assert_eq!(lhs.ncols(), axis_scales.len());
1964    stable_euclidean_norm(
1965        (0..lhs.ncols())
1966            .map(|axis| axis_scales[axis] * (lhs[[lhs_row, axis]] - rhs[[rhs_row, axis]])),
1967    )
1968}
1969
1970pub(crate) fn fill_symmetric_from_row_kernel<F>(
1971    matrix: &mut Array2<f64>,
1972    kernel: F,
1973) -> Result<(), BasisError>
1974where
1975    F: Fn(usize, usize) -> Result<f64, BasisError> + Sync,
1976{
1977    assert_eq!(matrix.nrows(), matrix.ncols());
1978    let k = matrix.nrows();
1979    // The kernels passed here are pure functions of the (symmetric) pairwise
1980    // center distance, so `kernel(i, j) == kernel(j, i)`. Evaluate only the
1981    // upper triangle (including the diagonal) in parallel — each row task
1982    // touches only its own `j >= i` cells, so the borrows stay disjoint — then
1983    // mirror into the lower triangle. This halves the (sqrt + special-function)
1984    // kernel evaluations relative to filling every cell independently, with no
1985    // change to the resulting matrix (still exactly symmetric).
1986    matrix
1987        .axis_iter_mut(Axis(0))
1988        .into_par_iter()
1989        .enumerate()
1990        .try_for_each(|(i, mut row)| {
1991            for j in i..k {
1992                row[j] = kernel(i, j)?;
1993            }
1994            Ok::<(), BasisError>(())
1995        })?;
1996    for i in 1..k {
1997        for j in 0..i {
1998            matrix[[i, j]] = matrix[[j, i]];
1999        }
2000    }
2001    Ok(())
2002}
2003
2004/// Return y-space points `y_{i,a} = exp(ψ_a) x_{i,a}` with
2005/// `ψ_a = η_a - mean(η)` so Euclidean pairwise
2006/// distances in y equal anisotropic kernel distances in x:
2007///   |y_i - y_j|² = Σ_a exp(2 ψ_a) (x_{i,a} - x_{j,a})² = aniso_distance²(x_i, x_j, η).
2008/// Use this before `pairwise_distance_bounds` whenever κ conditioning
2009/// bounds must match the kernel's actual metric (anisotropic case). For
2010/// isotropic terms, pass `None` and keep using the raw centers.
2011pub(crate) fn points_in_aniso_y_space(points: ArrayView2<'_, f64>, eta: &[f64]) -> Array2<f64> {
2012    assert_eq!(points.ncols(), eta.len());
2013    let mut y = points.to_owned();
2014    let eta_mean = centered_aniso_log_scale_mean(eta);
2015    let weights: Vec<f64> = eta.iter().map(|&e| aniso_axis_scale(e, eta_mean)).collect();
2016    for a in 0..eta.len() {
2017        let w_a = weights[a];
2018        y.column_mut(a).mapv_inplace(|v| v * w_a);
2019    }
2020    y
2021}
2022
2023/// Compute per-axis standard deviations of knot center coordinates.
2024///
2025/// Returns σ_a for each axis column of `centers`. Axes with zero variance
2026/// (constant column) get σ_a = 1.0. All values are clamped to [1e-6, 1e6].
2027pub fn knot_cloud_axis_scales(centers: ArrayView2<'_, f64>) -> Vec<f64> {
2028    let k = centers.nrows();
2029    let d = centers.ncols();
2030    if k < 2 || d == 0 {
2031        return vec![1.0; d];
2032    }
2033    let n = k as f64;
2034    let mut scales = Vec::with_capacity(d);
2035    for a in 0..d {
2036        let col = centers.column(a);
2037        let mean = col.sum() / n;
2038        let var = col.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / (n - 1.0);
2039        let sigma = var.sqrt();
2040        // If variance is zero (constant column), use 1.0 (no scaling).
2041        let sigma = if sigma < 1e-12 { 1.0 } else { sigma };
2042        scales.push(sigma.clamp(1e-6, 1e6));
2043    }
2044    scales
2045}
2046
2047/// Compute initial anisotropy contrasts η_a from knot center geometry.
2048///
2049/// Returns η_a = −ln(σ_a) + (1/d) Σ_b ln(σ_b), which satisfies Ση_a = 0
2050/// by construction. Axes with more spread get negative η_a (smaller κ_a,
2051/// longer correlation range), axes with less spread get positive η_a.
2052///
2053/// If d ≤ 1, returns an empty vector (anisotropy is meaningless for 1-D).
2054pub fn initial_aniso_contrasts(centers: ArrayView2<'_, f64>) -> Vec<f64> {
2055    let d = centers.ncols();
2056    if d <= 1 {
2057        return Vec::new();
2058    }
2059    let scales = knot_cloud_axis_scales(centers);
2060    let mean_neg_log: f64 = scales.iter().map(|&s| -s.ln()).sum::<f64>() / d as f64;
2061    // η_a = −ln(σ_a) + (1/d) Σ_b ln(σ_b)
2062    //     = −ln(σ_a) − mean(−ln(σ_b))
2063    //     = neg_log_scales[a] − mean(neg_log_scales)
2064    scales
2065        .iter()
2066        .map(|&scale| -scale.ln() - mean_neg_log)
2067        .collect()
2068}
2069
2070/// Pure forward transform of the supplied anisotropy log-scales: subtract the
2071/// mean (so Σ η = 0) and zero tiny residuals. `None` (or a 1-D problem, where
2072/// centering is a no-op) means *no* anisotropy.
2073///
2074/// This is a **continuous function of η with no hidden data dependence**: an
2075/// explicit all-zero vector centers to all-zero, i.e. the isotropic metric
2076/// (weights `exp(2·0) = 1`, Euclidean radius). It is therefore identical, as a
2077/// design, to the `None` path through `η = 0`, and is continuous across it —
2078/// `[1e-9, -1e-9]` and `[0, 0]` map to neighboring designs, not a jump.
2079///
2080/// The Matérn input-location jet/Hessian (`matern_metric_weights`, the public
2081/// `matern_input_location_first_jet`/`_hessian` FFI) and the `UserProvided`-center
2082/// forward design both apply *this* transform, so the jet differentiates exactly
2083/// the function the public design evaluates (#437), and an explicit isotropic
2084/// request reduces to the closed-form isotropic Matérn kernel rather than a
2085/// data-driven anisotropic one (#1042).
2086///
2087/// Auto-initialization of `η` from knot-cloud geometry is a *separate* concern
2088/// handled by [`auto_seed_aniso_contrasts`]; it is reserved for callers that
2089/// opt into data-derived geometry (the κ-optimizer's data-driven center
2090/// strategies and the pure-Duchon `scale_dims` path), selected by
2091/// [`resolve_matern_forward_aniso`].
2092pub(crate) fn centered_aniso_contrasts(aniso: Option<&[f64]>) -> Option<Vec<f64>> {
2093    match aniso {
2094        Some(v) if v.len() > 1 => Some(center_aniso_log_scales(v)),
2095        Some(v) => Some(v.to_vec()),
2096        None => None,
2097    }
2098}
2099
2100/// Auto-seed anisotropy contrasts from knot-cloud geometry for callers that use
2101/// an all-zero vector as the "initialize me" sentinel.
2102///
2103/// Used by (a) the pure-Duchon `scale_dims` path, where `η` is a FIXED,
2104/// geometry-derived basis parameter that is never enrolled as a REML hyper-axis
2105/// (see `spatial_term_supports_hyper_optimization`): "standardize the geometry,
2106/// then learn the smoothness"; and (b) the Matérn forward design when the term
2107/// uses a **data-driven** center strategy, i.e. the κ-optimizer's seeding
2108/// sentinel (the optimizer's analytic ψ-gradient is computed against the same
2109/// auto-seeded design, so the pair stays consistent). A non-zero (or absent)
2110/// vector is honored verbatim (centered, exactly like [`centered_aniso_contrasts`]);
2111/// only an *exactly* all-zero vector is replaced by `initial_aniso_contrasts(centers)`.
2112///
2113/// A `UserProvided`-center Matérn term does NOT use this — its geometry is fully
2114/// caller-specified, so an explicit all-zero η must be honored literally; folding
2115/// the geometry seed into that path made the public design discontinuous at
2116/// `η = 0` and hijacked explicit isotropic requests (#1042).
2117pub(crate) fn auto_seed_aniso_contrasts(
2118    centers: ArrayView2<'_, f64>,
2119    aniso: Option<&[f64]>,
2120) -> Option<Vec<f64>> {
2121    let eta = match aniso {
2122        Some(v) if v.len() > 1 => v,
2123        Some(v) => return Some(v.to_vec()),
2124        None => return None,
2125    };
2126    let all_zero = eta.iter().all(|&e| e == 0.0);
2127    if !all_zero {
2128        return Some(center_aniso_log_scales(eta));
2129    }
2130    let contrasts = initial_aniso_contrasts(centers);
2131    if contrasts.is_empty() {
2132        Some(center_aniso_log_scales(eta))
2133    } else {
2134        Some(center_aniso_log_scales(&contrasts))
2135    }
2136}
2137
2138fn center_aniso_log_scales(eta: &[f64]) -> Vec<f64> {
2139    if eta.len() <= 1 {
2140        return eta.to_vec();
2141    }
2142    let mean = eta.iter().sum::<f64>() / eta.len() as f64;
2143    eta.iter()
2144        .map(|&v| {
2145            let centered = v - mean;
2146            if centered.abs() <= 1e-15 {
2147                0.0
2148            } else {
2149                centered
2150            }
2151        })
2152        .collect()
2153}
2154
2155/// How the Matérn forward design build interprets an *exactly all-zero*
2156/// `aniso_log_scales` vector.
2157#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2158pub enum AnisoSeedMode {
2159    /// All-zero `η` is the κ-optimizer / `scale_dims` seeding sentinel: replace
2160    /// it with geometry-derived contrasts from the knot cloud
2161    /// ([`auto_seed_aniso_contrasts`]). This is the default for every internal
2162    /// build entry; the optimizer's analytic ψ-gradient is computed against the
2163    /// same auto-seeded design, so value/gradient stay consistent. Note that by
2164    /// the time the κ-optimizer rebuilds a frozen design the center strategy has
2165    /// usually been resolved to `UserProvided`, so center provenance cannot be
2166    /// used to distinguish this from a genuine literal request — the mode must
2167    /// be carried explicitly.
2168    AutoSeedFromGeometry,
2169    /// All-zero `η` is an explicit isotropic request and is honored literally
2170    /// ([`centered_aniso_contrasts`]): the design reduces to the closed-form
2171    /// isotropic Matérn and varies continuously through `η = 0`. The public
2172    /// `matern_basis` FFI (and its input-location jet/Hessian) selects this so a
2173    /// caller's explicit isotropic request is not hijacked into a data-driven
2174    /// anisotropic kernel (#1042).
2175    Literal,
2176}
2177
2178/// Resolve the anisotropy contrasts the Matérn forward design build applies,
2179/// dispatching on the explicit [`AnisoSeedMode`].
2180pub(crate) fn resolve_matern_forward_aniso(
2181    mode: AnisoSeedMode,
2182    centers: ArrayView2<'_, f64>,
2183    aniso: Option<&[f64]>,
2184) -> Option<Vec<f64>> {
2185    match mode {
2186        AnisoSeedMode::Literal => centered_aniso_contrasts(aniso),
2187        AnisoSeedMode::AutoSeedFromGeometry => auto_seed_aniso_contrasts(centers, aniso),
2188    }
2189}
2190
2191pub(crate) fn pairwise_distance_bounds(points: ArrayView2<'_, f64>) -> Option<(f64, f64)> {
2192    let n = points.nrows();
2193    let d = points.ncols();
2194    if n < 2 || d == 0 {
2195        return None;
2196    }
2197    let mut r_min = f64::INFINITY;
2198    let mut r_max = 0.0_f64;
2199    for i in 0..n {
2200        for j in (i + 1)..n {
2201            let r = stable_euclidean_norm((0..d).map(|c| points[[i, c]] - points[[j, c]]));
2202            if r.is_finite() && r > 0.0 {
2203                r_min = r_min.min(r);
2204                r_max = r_max.max(r);
2205            }
2206        }
2207    }
2208    if r_min.is_finite() && r_max.is_finite() && r_min > 0.0 && r_max > 0.0 {
2209        Some((r_min, r_max))
2210    } else {
2211        None
2212    }
2213}
2214
2215/// Capped-sample pairwise distance bounds for large point clouds.
2216///
2217/// Returns `(r_min_hat, r_max_hat)` such that:
2218/// - `r_max_hat <= true r_max`  (pairwise max over a sub-sample is monotone
2219///    in the sample, so the sampled max underestimates the true max).
2220/// - `r_min_hat >= true r_min`  (pairwise min over a sub-sample can only
2221///    exclude some pairs, so the sampled min overestimates the true min).
2222///
2223/// Both approximations are conservative for κ-bound derivation:
2224///   kappa_lo = 1e-2 / r_max_hat  >=  1e-2 / true r_max  (wider window, low κ)
2225///   kappa_hi = 1e2  / r_min_hat  <=  1e2  / true r_min  (tighter window, high κ)
2226/// so no feasible κ that the exact bound would include is excluded by the
2227/// approximation — it can only slightly shrink the high-κ tail, which is
2228/// exactly the regime (κ → ∞ ⇒ degenerate kernel) that we want the outer
2229/// optimizer to avoid anyway.
2230///
2231/// Sampling picks `K = 1024` indices spaced evenly across the FULL index range
2232/// `[0, n-1]` (endpoints included): `idx(s) = round(s·(n-1)/(K-1))`. For a cap
2233/// of `K = 1024` and n up to ~10⁹ this yields O(K²·d) work per call — a few
2234/// hundred μs. For n ≤ K the exact pairwise is used.
2235///
2236/// #1033: spanning the full range (rather than the contiguous prefix `0,
2237/// stride, …, (K-1)·stride` that an integer `stride = n/K` produces — which
2238/// floors to `1` for every n in `(K, 2K]` and so visits only indices `0..K`,
2239/// dropping the entire tail of the cloud) is what makes the diameter estimate
2240/// `r_max_hat` n-STABLE. The κ/ψ window is derived once from `r_max_hat`
2241/// (`psi_lo = ln(diameter_fraction / r_max_hat)`); a prefix-sampled `r_max_hat`
2242/// shrinks with the prefix's spatial extent as n crosses `K`, which silently
2243/// makes the outer optimizer's box — and therefore its whole trajectory —
2244/// n-dependent. Even spacing keeps `r_max_hat` ≈ the true cloud diameter at
2245/// every n, so the sufficient-statistic outer loop touches the same ψ window
2246/// regardless of n.
2247pub(crate) fn pairwise_distance_bounds_sampled(points: ArrayView2<'_, f64>) -> Option<(f64, f64)> {
2248    const K_CAP: usize = 1024;
2249    let n = points.nrows();
2250    let d = points.ncols();
2251    if n < 2 || d == 0 {
2252        return None;
2253    }
2254    if n <= K_CAP {
2255        return pairwise_distance_bounds(points);
2256    }
2257    // Evenly spaced indices spanning `[0, n-1]` inclusive (n > K_CAP ⇒ k = K_CAP
2258    // ≥ 2, so the denominator is positive and `idx(0)=0`, `idx(k-1)=n-1`). The
2259    // spacing `(n-1)/(k-1) > 1`, so distinct `s` map to distinct rows; any rare
2260    // rounding collision is harmless (the `r > 0.0` guard drops a zero pair).
2261    let k = K_CAP;
2262    let denom = (k - 1) as f64;
2263    let span = (n - 1) as f64;
2264    let sample_index = |s: usize| -> usize { ((s as f64) * span / denom).round() as usize };
2265    let mut r_min = f64::INFINITY;
2266    let mut r_max = 0.0_f64;
2267    for i_idx in 0..k {
2268        let i = sample_index(i_idx);
2269        for j_idx in (i_idx + 1)..k {
2270            let j = sample_index(j_idx);
2271            let r = stable_euclidean_norm((0..d).map(|c| points[[i, c]] - points[[j, c]]));
2272            if r.is_finite() && r > 0.0 {
2273                r_min = r_min.min(r);
2274                r_max = r_max.max(r);
2275            }
2276        }
2277    }
2278    if r_min.is_finite() && r_max.is_finite() && r_min > 0.0 && r_max > 0.0 {
2279        Some((r_min, r_max))
2280    } else {
2281        None
2282    }
2283}
2284
2285#[cfg(test)]
2286mod duchon_hybrid_psd_tests {
2287    use super::*;
2288    use faer::Side;
2289    use gam_linalg::faer_ndarray::FaerEigh;
2290
2291    /// #1033: the capped-sample diameter estimate must be n-STABLE on a fixed
2292    /// point cloud. A uniform grid on `[-3, 3]` has a fixed true diameter (6.0)
2293    /// and minimum spacing that shrinks like `6/(n-1)` regardless of how finely
2294    /// it is sampled. The κ/ψ window is derived ONCE from `r_max_hat`, so if the
2295    /// sampler underestimates the diameter as n crosses the `K_CAP = 1024`
2296    /// threshold (the old `stride = n/K` prefix bug visited only indices
2297    /// `0..K`, i.e. the LEFT HALF of the domain for n in `(1024, 2048]`,
2298    /// halving `r_max_hat`), the outer optimizer's box — and hence its whole
2299    /// trajectory — becomes n-dependent, which is exactly the invariant #1033
2300    /// forbids. This pins `r_max_hat ≈ 6.0` across the threshold.
2301    #[test]
2302    fn sampled_diameter_is_n_stable_across_cap_threshold() {
2303        let grid = |n: usize| -> Array2<f64> {
2304            let mut x = Array2::<f64>::zeros((n, 1));
2305            for i in 0..n {
2306                x[[i, 0]] = (i as f64) / (n as f64 - 1.0) * 6.0 - 3.0;
2307            }
2308            x
2309        };
2310        // Below the cap (exact), straddling it, and well above it.
2311        let exact_diam = 6.0_f64;
2312        let mut last_rmax: Option<f64> = None;
2313        for &n in &[1000usize, 1025, 1500, 2000, 4000, 50_000] {
2314            let x = grid(n);
2315            let (r_min, r_max) =
2316                pairwise_distance_bounds_sampled(x.view()).expect("bounds for dense grid");
2317            // The sampled diameter must stay within 1% of the true 6.0 at every
2318            // n — NOT collapse to ~3.0 as the prefix bug did for n in (1024,2048].
2319            assert!(
2320                (r_max - exact_diam).abs() <= 0.01 * exact_diam,
2321                "sampled r_max at n={n} = {r_max:.6} drifted from the true diameter \
2322                 {exact_diam:.6}: the diameter estimate is n-dependent (#1033)"
2323            );
2324            // Cross-n stability: consecutive n's must agree on r_max to <2%.
2325            if let Some(prev) = last_rmax {
2326                let rel = (r_max - prev).abs() / exact_diam;
2327                assert!(
2328                    rel <= 0.02,
2329                    "sampled r_max jumped {rel:.4} (rel) between n steps near n={n}: \
2330                     {prev:.6} -> {r_max:.6}; outer-loop box is not n-stable (#1033)"
2331                );
2332            }
2333            last_rmax = Some(r_max);
2334            // r_min is positive and finite (the floor used for the high-κ ceiling).
2335            assert!(
2336                r_min.is_finite() && r_min > 0.0,
2337                "r_min must be positive at n={n}"
2338            );
2339        }
2340    }
2341
2342    /// The sampler's chosen indices must span the FULL range `[0, n-1]`
2343    /// (endpoints included) — the property that makes the diameter estimate
2344    /// stable. Reconstruct the index set the implementation uses and assert it
2345    /// reaches both ends with no contiguous-prefix clustering.
2346    #[test]
2347    fn sampled_indices_span_full_range() {
2348        const K_CAP: usize = 1024;
2349        let n = 2000usize; // stride = n/K_CAP would floor to 1 → prefix bug regime
2350        let k = K_CAP;
2351        let denom = (k - 1) as f64;
2352        let span = (n - 1) as f64;
2353        let idx = |s: usize| -> usize { ((s as f64) * span / denom).round() as usize };
2354        assert_eq!(idx(0), 0, "first sample must be index 0");
2355        assert_eq!(idx(k - 1), n - 1, "last sample must be the final index n-1");
2356        // The largest gap between consecutive samples must be ≈ (n-1)/(k-1),
2357        // i.e. roughly 2 here — NOT a single dense prefix followed by a void.
2358        let mut max_gap = 0usize;
2359        for s in 1..k {
2360            max_gap = max_gap.max(idx(s) - idx(s - 1));
2361        }
2362        assert!(
2363            max_gap <= 2,
2364            "evenly-spaced samples should step by ~{:.2}; saw a gap of {max_gap} \
2365             (prefix clustering would leave one huge gap)",
2366            span / denom
2367        );
2368    }
2369
2370    /// Deterministic, well-separated centers on `[-1, 1]^d` (a Halton-style
2371    /// low-discrepancy lattice over the radical-inverse base sequence). Mirrors
2372    /// the `4*d` random centers the Python fixture
2373    /// (`tests/test_python_api.py`'s high-dimensional hybrid Duchon penalty PSD
2374    /// check) draws, but without an RNG so the regression is byte-stable.
2375    fn fixture_centers(d: usize, n: usize) -> Array2<f64> {
2376        const BASES: [u64; 24] = [
2377            2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
2378            89,
2379        ];
2380        let mut centers = Array2::<f64>::zeros((n, d));
2381        for i in 0..n {
2382            for axis in 0..d {
2383                let base = BASES[axis % BASES.len()];
2384                // Van der Corput radical inverse of (i + 1) in `base`, mapped to
2385                // [-1, 1]. Different axes use different primes, so the cloud is
2386                // affinely full-rank and spans the linear null space.
2387                let mut f = 1.0_f64;
2388                let mut idx = (i + 1) as u64;
2389                let mut value = 0.0_f64;
2390                while idx > 0 {
2391                    f /= base as f64;
2392                    value += f * (idx % base) as f64;
2393                    idx /= base;
2394                }
2395                centers[[i, axis]] = 2.0 * value - 1.0;
2396            }
2397        }
2398        centers
2399    }
2400
2401    /// Smallest symmetric eigenvalue of `matrix` (the matrix is symmetrized
2402    /// first; the constrained Duchon penalty is symmetric by construction).
2403    fn lambda_min(matrix: &Array2<f64>) -> f64 {
2404        let sym = symmetrize_penalty(matrix);
2405        let (evals, _) = FaerEigh::eigh(&sym, Side::Lower).expect("symmetric eigendecomposition");
2406        evals.iter().copied().fold(f64::INFINITY, f64::min)
2407    }
2408
2409    /// gam#1424: the (d=16, m=2, s=7) hybrid Duchon–Matérn fixture used to lose
2410    /// positive definiteness through catastrophic cancellation in the
2411    /// partial-fraction kernel expansion — the constrained, post-normalization
2412    /// penalty had λ_min ≈ −0.26442 even though the kernel's spectral density
2413    /// `ρ^{-2p}(κ²+ρ²)^{-s}` is nonnegative (so the true penalty is PSD). The
2414    /// kernel now routes through the cancellation-free single-integral form, so
2415    /// the spectrum is numerically PSD. This mirrors the production penalty path
2416    /// `duchon_constrained_bending_penalty` → `normalize_penalty`.
2417    #[test]
2418    fn high_dim_hybrid_penalty_is_numerically_psd_1424() {
2419        let d = 16usize;
2420        // m=2 ⇒ Linear null space. The cubic default spectral power is the
2421        // fractional (d-1)/2 = 7.5; the production hybrid config resolves it to
2422        // the integer spectral order the closed-form kernel consumes, s = 7
2423        // (`duchon_constrained_bending_penalty` itself takes the integer view via
2424        // `duchon_power_to_usize`, and the reroute predicate needs s ≥ 1). This is
2425        // the (d=16, m=2, s=7) fixture from the issue and the Python
2426        // `duchon_function_norm_penalty` PSD test.
2427        let (nullspace_order, default_power) = duchon_cubic_default(d);
2428        assert!(matches!(nullspace_order, DuchonNullspaceOrder::Linear));
2429        assert!(
2430            (default_power - 7.5).abs() < 1e-12,
2431            "cubic-default power for d=16 is 7.5"
2432        );
2433        let power = 7.0_f64;
2434        assert_eq!(duchon_power_to_usize(power), 7);
2435        // The reroute must engage for this fixture (s = 7 ≥ 1, 2p = 4 < d = 16).
2436        assert!(duchon_hybrid_stable_integral_applies(
2437            duchon_p_from_nullspace_order(nullspace_order),
2438            duchon_power_to_usize(power),
2439            d,
2440        ));
2441        let length_scale = Some(1.0_f64);
2442        let centers = fixture_centers(d, 4 * d);
2443
2444        let mut cache = BasisCacheContext::default();
2445        let z = kernel_constraint_nullspace(centers.view(), nullspace_order, &mut cache)
2446            .expect("constraint null space");
2447
2448        let omega = duchon_constrained_bending_penalty(
2449            centers.view(),
2450            length_scale,
2451            power,
2452            nullspace_order,
2453            None,
2454            &z,
2455        )
2456        .expect("constrained bending penalty assembles for the hybrid fixture");
2457        let (penalty, _scale) = normalize_penalty(&omega);
2458
2459        let lam_min = lambda_min(&penalty);
2460        assert!(
2461            lam_min >= -1e-10,
2462            "gam#1424: (d=16, m=2, s=7) hybrid penalty is not numerically PSD: \
2463             λ_min={lam_min:.6e} (was ≈ −0.26442 with the cancellation-prone \
2464             partial-fraction kernel)"
2465        );
2466    }
2467
2468    /// gam#1604: independent closed form for the hybrid-kernel origin value
2469    /// `φ(0) = F⁻¹[ρ^{-2p}(κ²+ρ²)^{-s}](0)` in `d` dimensions, derived by
2470    /// Schwinger-parametrizing both rational factors and evaluating the radial
2471    /// inverse-FT integral at `r = 0`:
2472    ///
2473    ///   φ(0) = (4π)^{-d/2} / Γ(s) · Γ(b) · κ^{-2b} · Γ(d/2 − p) / Γ(d/2),
2474    ///   b = p + s − d/2.
2475    ///
2476    /// Finite whenever `2(p+s) > d` and (for the Γ(d/2 − p) factor to avoid a
2477    /// pole) `d` is odd or `2p < d`. This reuses none of the Taylor-coefficient
2478    /// machinery under test, so it is a true oracle for the collision diagonal.
2479    fn phi0_closed_form(p: usize, s: usize, d: usize, kappa: f64) -> f64 {
2480        let half_d = 0.5 * d as f64;
2481        let b = p as f64 + s as f64 - half_d;
2482        (4.0 * std::f64::consts::PI).powf(-half_d) / gamma_lanczos(s as f64)
2483            * gamma_lanczos(b)
2484            * kappa.powf(-2.0 * b)
2485            * gamma_lanczos(half_d - p as f64)
2486            / gamma_lanczos(half_d)
2487    }
2488
2489    /// gam#1604 — the collision (r = 0) diagonal of the hybrid Duchon–Matérn
2490    /// kernel must equal the independent closed form above. The half-integer-ν
2491    /// Taylor coefficients that feed `duchon_hybrid_kernel_collision_value`
2492    /// previously miscounted the K_{l+½} polynomial degree (`l = 2|ν| − 1`
2493    /// instead of `|ν| − ½`), zeroing the r⁰ term of every |ν| ≥ 3/2 block and
2494    /// silently dropping their contribution to φ(0).
2495    #[test]
2496    fn hybrid_collision_diagonal_matches_closed_form_1604() {
2497        // Odd dimensions exercise the half-integer-ν path. For each, sweep p, s
2498        // (with 2(p+s) > d) and κ. d = 1, n ≥ 2 ⇒ ν ≥ 3/2 is the regressed case.
2499        for &d in &[1usize, 3, 5] {
2500            for &p in &[1usize, 2, 3] {
2501                for &s in &[1usize, 2, 3, 4] {
2502                    if 2 * (p + s) <= d {
2503                        continue;
2504                    }
2505                    for &kappa in &[0.5f64, 1.0, 2.5] {
2506                        let coeffs = duchon_partial_fraction_coeffs(p, s, kappa);
2507                        let got =
2508                            duchon_hybrid_kernel_collision_value(1.0 / kappa, p, s, d, &coeffs)
2509                                .expect("collision diagonal");
2510                        let want = phi0_closed_form(p, s, d, kappa);
2511                        let rel = (got - want).abs() / want.abs().max(1e-300);
2512                        assert!(
2513                            rel < 1e-10,
2514                            "φ(0) mismatch d={d} p={p} s={s} κ={kappa}: got {got:.12e}, want {want:.12e} (rel {rel:.2e})"
2515                        );
2516                    }
2517                }
2518            }
2519        }
2520    }
2521
2522    /// gam#1604 — the near-collision Taylor branch must be continuous with the
2523    /// direct partial-fraction sum: assembling φ(r) from φ(0), φ″(0), φ⁗(0),
2524    /// φ⁽⁶⁾(0) (all built from the same half-integer-ν Taylor coefficients) must
2525    /// match the cancellation-free direct block sum at a small radius where both
2526    /// are individually accurate. This exercises the j ≥ 1 coefficients (the
2527    /// diagonal test only pins j = 0).
2528    #[test]
2529    fn hybrid_near_collision_continuous_with_direct_1604() {
2530        for &d in &[1usize, 3] {
2531            for &p in &[1usize, 2] {
2532                for &s in &[2usize, 3] {
2533                    if 2 * (p + s) <= d + 6 {
2534                        // Need φ⁽⁶⁾(0) to exist for the full 6th-order Taylor.
2535                        continue;
2536                    }
2537                    for &kappa in &[0.5f64, 1.0, 2.0] {
2538                        let length_scale = 1.0 / kappa;
2539                        let coeffs = duchon_partial_fraction_coeffs(p, s, kappa);
2540                        // r small enough that the truncated 6th-order Taylor is
2541                        // accurate to ~r⁸, yet large enough that the direct block
2542                        // sum has not lost precision (d = 1/3, moderate κ).
2543                        let r = 0.02 * length_scale;
2544                        let taylor = duchon_hybrid_kernel_near_collision_value(
2545                            r,
2546                            length_scale,
2547                            p,
2548                            s,
2549                            d,
2550                            &coeffs,
2551                        )
2552                        .expect("near-collision value");
2553                        // Direct partial-fraction sum (real Bessel-K, no Taylor).
2554                        let mut direct = 0.0f64;
2555                        for (m, &a_m) in coeffs.a.iter().enumerate().skip(1) {
2556                            if a_m != 0.0 {
2557                                direct += a_m * polyharmonic_kernel(r, m as f64, d);
2558                            }
2559                        }
2560                        for (n, &b_n) in coeffs.b.iter().enumerate().skip(1) {
2561                            if b_n != 0.0 {
2562                                direct += b_n
2563                                    * duchon_matern_block(r, kappa, n, d).expect("matern block");
2564                            }
2565                        }
2566                        let rel = (taylor - direct).abs() / direct.abs().max(1e-300);
2567                        assert!(
2568                            rel < 1e-9,
2569                            "near-collision vs direct mismatch d={d} p={p} s={s} κ={kappa} r={r}: \
2570                             taylor {taylor:.12e}, direct {direct:.12e} (rel {rel:.2e})"
2571                        );
2572                    }
2573                }
2574            }
2575        }
2576    }
2577
2578    /// gam#1604 — the production constrained Duchon penalty `Ω_c = α²·ZᵀK_CC Z`
2579    /// for a `d = 1` hybrid smooth with power ≥ 2 must be numerically PSD across
2580    /// realistic length scales. Before the Taylor-degree fix the corrupted
2581    /// diagonal made `Ω_c ≈ Ω_true − δ·I` (δ = the dropped diagonal mass),
2582    /// giving λ_min ≈ −δ < 0 at *every* length scale — the issue's report.
2583    #[test]
2584    fn d1_hybrid_penalty_is_psd_1604() {
2585        let d = 1usize;
2586        let nullspace_order = DuchonNullspaceOrder::Linear; // p = 2
2587        let centers = fixture_centers(d, 12);
2588        let mut cache = BasisCacheContext::default();
2589        let z = kernel_constraint_nullspace(centers.view(), nullspace_order, &mut cache)
2590            .expect("constraint null space");
2591        for &power in &[2.0f64, 3.0] {
2592            for &length_scale in &[0.5f64, 1.0, 10.0, 100.0] {
2593                let omega = duchon_constrained_bending_penalty(
2594                    centers.view(),
2595                    Some(length_scale),
2596                    power,
2597                    nullspace_order,
2598                    None,
2599                    &z,
2600                )
2601                .unwrap_or_else(|e| {
2602                    panic!("d=1 p=2 s={power} ls={length_scale} penalty rejected: {e}")
2603                });
2604                let (penalty, _scale) = normalize_penalty(&omega);
2605                let lam_min = lambda_min(&penalty);
2606                assert!(
2607                    lam_min >= -1e-9,
2608                    "d=1 p=2 s={power} ls={length_scale}: λ_min={lam_min:.6e} (not PSD)"
2609                );
2610            }
2611        }
2612    }
2613
2614    /// No-regression guard: a well-conditioned low-dimensional fixture must keep
2615    /// the exact kernel VALUES the partial-fraction path produced before the
2616    /// gam#1424 fix. For d=2 the stable-integral reroute does not apply
2617    /// (`2p=4 ≥ d=2`), so `duchon_matern_kernel_general_from_distance` still runs
2618    /// the original sum verbatim; pinning it against an independent direct
2619    /// evaluation of the same partial-fraction blocks proves the production
2620    /// routing is unchanged for low `d`.
2621    #[test]
2622    fn low_dim_hybrid_kernel_values_unchanged_1424() {
2623        let d = 2usize;
2624        let p_order = 2usize; // Linear null space (m=2)
2625        let s_order = 2usize;
2626        let kappa = 1.0_f64;
2627        let length_scale = Some(1.0_f64);
2628        // The d=2 case is NOT rerouted to the stable integral.
2629        assert!(!duchon_hybrid_stable_integral_applies(p_order, s_order, d));
2630        let coeffs = duchon_partial_fraction_coeffs(p_order, s_order, kappa);
2631
2632        for &r in &[0.25_f64, 0.75, 1.5] {
2633            // Independent reference: the raw partial-fraction sum
2634            // Σ a_m·r^{2m-d}(·log) + Σ b_n·matern_block, identical in form to the
2635            // production direct-sum branch but assembled here from scratch.
2636            let mut reference = 0.0_f64;
2637            for (m, &coeff) in coeffs.a.iter().enumerate().skip(1) {
2638                if coeff != 0.0 {
2639                    reference += coeff * polyharmonic_kernel(r, m as f64, d);
2640                }
2641            }
2642            for (n, &coeff) in coeffs.b.iter().enumerate().skip(1) {
2643                if coeff != 0.0 {
2644                    reference += coeff * duchon_matern_block(r, kappa, n, d).expect("matern block");
2645                }
2646            }
2647
2648            let got = duchon_matern_kernel_general_from_distance(
2649                r,
2650                length_scale,
2651                p_order,
2652                s_order,
2653                d,
2654                Some(&coeffs),
2655            )
2656            .expect("low-d hybrid kernel value");
2657            assert!(
2658                (got - reference).abs() <= 1e-10,
2659                "low-d hybrid kernel value regressed at r={r}: got {got:.15e}, reference {reference:.15e}"
2660            );
2661        }
2662    }
2663}