Skip to main content

gam_terms/basis/
duchon_kernel_math.rs

1use super::*;
2
3pub fn build_duchon_collocation_operator_matrices(
4    centers: ArrayView2<'_, f64>,
5    collocationweights: Option<ArrayView1<'_, f64>>,
6    length_scale: Option<f64>,
7    power: f64,
8    nullspace_order: DuchonNullspaceOrder,
9    aniso_log_scales: Option<&[f64]>,
10    identifiability_transform: Option<ArrayView2<'_, f64>>,
11    max_operator_derivative_order: usize,
12) -> Result<CollocationOperatorMatrices, BasisError> {
13    let mut workspace = BasisWorkspace::default();
14    build_duchon_collocation_operator_matriceswithworkspace(
15        centers,
16        centers,
17        collocationweights,
18        length_scale,
19        power,
20        nullspace_order,
21        aniso_log_scales,
22        identifiability_transform,
23        max_operator_derivative_order,
24        &mut workspace,
25    )
26}
27
28pub fn build_duchon_operator_penalty_matrices(
29    centers: ArrayView2<'_, f64>,
30    collocationweights: Option<ArrayView1<'_, f64>>,
31    length_scale: Option<f64>,
32    power: f64,
33    nullspace_order: DuchonNullspaceOrder,
34    aniso_log_scales: Option<&[f64]>,
35    identifiability_transform: Option<ArrayView2<'_, f64>>,
36) -> Result<DuchonOperatorPenaltyMatrices, BasisError> {
37    let ops = build_duchon_collocation_operator_matrices(
38        centers,
39        collocationweights,
40        length_scale,
41        power,
42        nullspace_order,
43        aniso_log_scales,
44        identifiability_transform,
45        2,
46    )?;
47    let (mass, _) = normalize_penalty(&symmetrize(&fast_ata(&ops.d0)));
48    let (tension, _) = normalize_penalty(&symmetrize(&fast_ata(&ops.d1)));
49    let (stiffness, _) = normalize_penalty(&symmetrize(&fast_ata(&ops.d2)));
50    Ok(DuchonOperatorPenaltyMatrices {
51        mass,
52        tension,
53        stiffness,
54    })
55}
56
57pub fn build_thin_plate_penalty_matrix(
58    centers: ArrayView2<'_, f64>,
59    length_scale: f64,
60) -> Result<ThinPlatePenaltyMatrix, BasisError> {
61    let mut workspace = BasisWorkspace::default();
62    let kernel_transform = thin_plate_kernel_constraint_nullspace(centers, &mut workspace.cache)?;
63    let (penalty, _) =
64        build_thin_plate_penalty_matrices(centers, length_scale, &kernel_transform, false)?;
65    let (penalty, _) = normalize_penalty(&penalty);
66    Ok(ThinPlatePenaltyMatrix { penalty })
67}
68
69pub fn build_duchon_collocation_operator_matriceswithworkspace(
70    centers: ArrayView2<'_, f64>,
71    collocation_points: ArrayView2<'_, f64>,
72    collocationweights: Option<ArrayView1<'_, f64>>,
73    length_scale: Option<f64>,
74    power: f64,
75    nullspace_order: DuchonNullspaceOrder,
76    aniso_log_scales: Option<&[f64]>,
77    identifiability_transform: Option<ArrayView2<'_, f64>>,
78    max_operator_derivative_order: usize,
79    workspace: &mut BasisWorkspace,
80) -> Result<CollocationOperatorMatrices, BasisError> {
81    // The operator design rows are the COLLOCATION points (a density-blind,
82    // space-filling sample of the data support); the columns are the `k` basis
83    // CENTERS. Decoupling them is what makes the operator penalty a faithful
84    // quadrature of `∫‖Dᵠf‖²` (collocating at the `k` centers themselves — the
85    // old `collocation_points == centers` special case — under-samples a
86    // `k`-bump basis and is what made these penalties explode).
87    let nullspace_order = duchon_effective_nullspace_order(centers, nullspace_order);
88    // Auto-raise the null-space order (p) so the pointwise kernel and every
89    // active derivative-collocation operator clear their well-posedness margin
90    // `2(p + s) > d + max_op` BEFORE the guard in
91    // `validate_duchon_collocation_orders` can fire. Mirrors the auto-degrade
92    // above; only `p` is lifted, so the spectral power and CPD condition are
93    // untouched. See `duchon_order_for_operator_margin`.
94    let nullspace_order = duchon_order_for_operator_margin(
95        centers.ncols(),
96        power,
97        nullspace_order,
98        max_operator_derivative_order,
99    );
100    let p_order = duchon_p_from_nullspace_order(nullspace_order);
101    let s_order: f64 = power;
102    let p_colloc = collocation_points.nrows();
103    let n_basis = centers.nrows();
104    let dim = centers.ncols();
105    if collocation_points.ncols() != dim {
106        crate::bail_dim_basis!(
107            "collocation points dim {} != centers dim {dim}",
108            collocation_points.ncols()
109        );
110    }
111    validate_duchon_collocation_orders(
112        length_scale,
113        p_order,
114        s_order,
115        dim,
116        max_operator_derivative_order,
117    )?;
118    if let Some(eta) = aniso_log_scales
119        && eta.len() != dim
120    {
121        crate::bail_dim_basis!(
122            "Duchon anisotropy dimension mismatch: got {}, expected {dim}",
123            eta.len()
124        );
125    }
126    // Partial-fraction expansion only runs in the hybrid Matérn branch
127    // (`length_scale = Some`). The scale-free path (`length_scale = None`)
128    // skips it entirely and is fractional-clean down to the Riesz kernel.
129    let coeffs = length_scale.map(|scale| {
130        let s_int = duchon_power_to_usize(s_order);
131        duchon_partial_fraction_coeffs(p_order, s_int, 1.0 / scale.max(1e-300))
132    });
133    let metric_weights: Option<Vec<f64>> = aniso_log_scales.map(centered_aniso_metric_weights);
134    let row_scales = if let Some(w) = collocationweights {
135        if w.len() != p_colloc {
136            crate::bail_dim_basis!(
137                "collocation weight length mismatch: got {}, expected {p_colloc}",
138                w.len()
139            );
140        }
141        let mut out = Vec::with_capacity(p_colloc);
142        for &wk in w {
143            if !wk.is_finite() || wk < 0.0 {
144                crate::bail_invalid_basis!(
145                    "collocation weights must be finite and non-negative; got {wk}"
146                );
147            }
148            out.push(wk.sqrt());
149        }
150        out
151    } else {
152        vec![1.0; p_colloc]
153    };
154    let z = kernel_constraint_nullspace(centers, nullspace_order, &mut workspace.cache)?;
155    // D0/D1/D2 rows = collocation points (`p_colloc`), columns = basis centers
156    // (`n_basis`). Gradients/Hessians are taken w.r.t. the EVALUATION point
157    // (the collocation row), so `delta = collocation - center`. No symmetry: the
158    // two point sets differ in general.
159    // Skip the costly higher-derivative designs the caller doesn't need: mass
160    // (D0) + tension (D1) build with `max_op = 1`, so the `O(d²)`-row Hessian
161    // (D2) is never allocated or filled — decisive in high `d`.
162    let build_d1 = max_operator_derivative_order >= 1;
163    let build_d2 = max_operator_derivative_order >= 2;
164    let mut d0_raw = Array2::<f64>::zeros((p_colloc, n_basis));
165    let mut d1_raw = Array2::<f64>::zeros((if build_d1 { p_colloc * dim } else { 0 }, n_basis));
166    let mut d2_raw =
167        Array2::<f64>::zeros((if build_d2 { p_colloc * dim * dim } else { 0 }, n_basis));
168    const R_EPS: f64 = 1e-10;
169    for i in 0..p_colloc {
170        let scale_i = row_scales[i];
171        for j in 0..n_basis {
172            let r = if let Some(eta) = aniso_log_scales {
173                let row_i: Vec<f64> = (0..dim).map(|a| collocation_points[[i, a]]).collect();
174                let row_j: Vec<f64> = (0..dim).map(|a| centers[[j, a]]).collect();
175                aniso_distance(&row_i, &row_j, eta)
176            } else {
177                stable_euclidean_norm(
178                    (0..dim).map(|axis| collocation_points[[i, axis]] - centers[[j, axis]]),
179                )
180            };
181            // Floor coincident collocation/center pairs off the kernel's origin
182            // singularity: a farthest-point sample can land exactly on a center.
183            // The gradient/Hessian limits at r→0 are the zeros the `r > R_EPS`
184            // guards below already produce, so flooring only avoids the log-case
185            // `r²·log r` second-derivative blow-up at exact r=0.
186            let r = r.max(R_EPS);
187            let (phi, q, t) = if let (Some(length_scale), Some(coeffs)) =
188                (length_scale, coeffs.as_ref())
189            {
190                let jets =
191                    duchon_radial_jets(r, length_scale, p_order, s_order as usize, dim, coeffs)?;
192                (jets.phi, jets.q, jets.t)
193            } else {
194                let (phi, phi_r, phi_rr) = duchon_kernel_radial_triplet(
195                    r,
196                    length_scale,
197                    p_order,
198                    s_order,
199                    dim,
200                    coeffs.as_ref(),
201                )?;
202                let q = if r > R_EPS { phi_r / r } else { phi_rr };
203                let t = if r > R_EPS {
204                    (phi_rr - q) / (r * r)
205                } else {
206                    0.0
207                };
208                (phi, q, t)
209            };
210            if !phi.is_finite() || !q.is_finite() || !t.is_finite() {
211                crate::bail_invalid_basis!(
212                    "non-finite Duchon collocation operator derivative at (colloc {i}, center {j}), r={r}"
213                );
214            }
215            d0_raw[[i, j]] = scale_i * phi;
216            if build_d2 {
217                for axis_a in 0..dim {
218                    let h_a = collocation_points[[i, axis_a]] - centers[[j, axis_a]];
219                    let w_a = metric_weights
220                        .as_ref()
221                        .map(|weights| weights[axis_a])
222                        .unwrap_or(1.0);
223                    for axis_b in 0..dim {
224                        let h_b = collocation_points[[i, axis_b]] - centers[[j, axis_b]];
225                        let w_b = metric_weights
226                            .as_ref()
227                            .map(|weights| weights[axis_b])
228                            .unwrap_or(1.0);
229                        let diagonal = if axis_a == axis_b { q * w_a } else { 0.0 };
230                        let mixed = if r > R_EPS {
231                            t * w_a * h_a * w_b * h_b
232                        } else {
233                            0.0
234                        };
235                        let value = diagonal + mixed;
236                        let row_i = (i * dim + axis_a) * dim + axis_b;
237                        d2_raw[[row_i, j]] = scale_i * value;
238                    }
239                }
240            }
241            if build_d1 && r > R_EPS {
242                for axis in 0..dim {
243                    let delta = collocation_points[[i, axis]] - centers[[j, axis]];
244                    let axis_scale = metric_weights
245                        .as_ref()
246                        .map(|weights| weights[axis])
247                        .unwrap_or(1.0);
248                    d1_raw[[i * dim + axis, j]] = scale_i * q * axis_scale * delta;
249                }
250            }
251        }
252    }
253    let d0_kernel = fast_ab(&d0_raw, &z);
254    let poly = polynomial_block_from_order(centers, nullspace_order);
255    let poly_collocation = polynomial_block_from_order(collocation_points, nullspace_order);
256    let poly_d1 = if build_d1 {
257        polynomial_derivative_block(collocation_points, nullspace_order, 1)
258    } else {
259        Array2::<f64>::zeros((0, poly.ncols()))
260    };
261    let poly_d2 = if build_d2 {
262        polynomial_derivative_block(collocation_points, nullspace_order, 2)
263    } else {
264        Array2::<f64>::zeros((0, poly.ncols()))
265    };
266    let kernel_cols = d0_kernel.ncols();
267    let poly_cols = poly.ncols();
268    let total_cols = kernel_cols + poly_cols;
269    // The operator matrices act on the SAME coefficient basis as the emitted
270    // design: constrained radial columns followed by explicit polynomial
271    // null-space columns.  The lower-order Hilbert-scale penalties are function
272    // penalties, not just radial-kernel penalties, so the polynomial block must
273    // be evaluated/differentiated at the collocation sites too: D0 sees the
274    // polynomial value, D1 its gradient, and D2 its Hessian. Orders the caller
275    // skipped stay empty (0 rows).
276    let mut d0 = Array2::<f64>::zeros((p_colloc, total_cols));
277    d0.slice_mut(s![.., 0..kernel_cols]).assign(&d0_kernel);
278    d0.slice_mut(s![.., kernel_cols..total_cols])
279        .assign(&poly_collocation);
280    let mut d1 = Array2::<f64>::zeros((if build_d1 { p_colloc * dim } else { 0 }, total_cols));
281    if build_d1 {
282        d1.slice_mut(s![.., 0..kernel_cols])
283            .assign(&fast_ab(&d1_raw, &z));
284        d1.slice_mut(s![.., kernel_cols..total_cols])
285            .assign(&poly_d1);
286    }
287    let mut d2 =
288        Array2::<f64>::zeros((if build_d2 { p_colloc * dim * dim } else { 0 }, total_cols));
289    if build_d2 {
290        d2.slice_mut(s![.., 0..kernel_cols])
291            .assign(&fast_ab(&d2_raw, &z));
292        d2.slice_mut(s![.., kernel_cols..total_cols])
293            .assign(&poly_d2);
294    }
295    if let Some(z) = identifiability_transform {
296        let z = z.to_owned();
297        d0 = fast_ab(&d0, &z);
298        d1 = fast_ab(&d1, &z);
299        d2 = fast_ab(&d2, &z);
300    }
301    Ok(CollocationOperatorMatrices {
302        d0,
303        d1,
304        d2,
305        collocation_points: collocation_points.to_owned(),
306        kernel_nullspace_transform: Some(z),
307        polynomial_block_cols: poly_cols,
308    })
309}
310
311fn polynomial_derivative_block(
312    points: ArrayView2<'_, f64>,
313    order: DuchonNullspaceOrder,
314    derivative_order: usize,
315) -> Array2<f64> {
316    let n = points.nrows();
317    let d = points.ncols();
318    let degree = match order {
319        DuchonNullspaceOrder::Zero => 0,
320        DuchonNullspaceOrder::Linear => 1,
321        DuchonNullspaceOrder::Degree(degree) => degree,
322    };
323    let exponents = monomial_exponents(d, degree);
324    match derivative_order {
325        1 => {
326            let mut block = Array2::<f64>::zeros((n * d, exponents.len()));
327            for row in 0..n {
328                for axis in 0..d {
329                    let out_row = row * d + axis;
330                    for (col, exps) in exponents.iter().enumerate() {
331                        block[[out_row, col]] = monomial_derivative_value(points, row, exps, axis);
332                    }
333                }
334            }
335            block
336        }
337        2 => {
338            let mut block = Array2::<f64>::zeros((n * d * d, exponents.len()));
339            for row in 0..n {
340                for axis_a in 0..d {
341                    for axis_b in 0..d {
342                        let out_row = (row * d + axis_a) * d + axis_b;
343                        for (col, exps) in exponents.iter().enumerate() {
344                            block[[out_row, col]] =
345                                monomial_second_derivative_value(points, row, exps, axis_a, axis_b);
346                        }
347                    }
348                }
349            }
350            block
351        }
352        _ => Array2::<f64>::zeros((0, exponents.len())),
353    }
354}
355
356fn monomial_derivative_value(
357    points: ArrayView2<'_, f64>,
358    row: usize,
359    exponents: &[usize],
360    axis: usize,
361) -> f64 {
362    let exponent = exponents[axis];
363    if exponent == 0 {
364        return 0.0;
365    }
366    let mut value = exponent as f64;
367    for a in 0..points.ncols() {
368        let power = exponents[a] - usize::from(a == axis);
369        if power != 0 {
370            value *= points[[row, a]].powi(power as i32);
371        }
372    }
373    value
374}
375
376fn monomial_second_derivative_value(
377    points: ArrayView2<'_, f64>,
378    row: usize,
379    exponents: &[usize],
380    axis_a: usize,
381    axis_b: usize,
382) -> f64 {
383    let coeff = if axis_a == axis_b {
384        let exponent = exponents[axis_a];
385        if exponent < 2 {
386            return 0.0;
387        }
388        (exponent * (exponent - 1)) as f64
389    } else {
390        let exponent_a = exponents[axis_a];
391        let exponent_b = exponents[axis_b];
392        if exponent_a == 0 || exponent_b == 0 {
393            return 0.0;
394        }
395        (exponent_a * exponent_b) as f64
396    };
397    let mut value = coeff;
398    for axis in 0..points.ncols() {
399        let consumed = usize::from(axis == axis_a) + usize::from(axis == axis_b);
400        let power = exponents[axis] - consumed;
401        if power != 0 {
402            value *= points[[row, axis]].powi(power as i32);
403        }
404    }
405    value
406}
407
408#[inline(always)]
409pub(crate) fn bessel_k0_stable(x: f64) -> f64 {
410    let x_pos = x.max(1e-300);
411    if x_pos <= 2.0 {
412        return bessel_k0_small_series(x_pos);
413    }
414    let y = 2.0 / x_pos;
415    (-x_pos).exp() / x_pos.sqrt()
416        * (1.253_314_14
417            + y * (-0.078_323_58
418                + y * (0.021_895_68
419                    + y * (-0.010_624_46
420                        + y * (0.005_878_72 + y * (-0.002_515_40 + y * 0.000_532_08))))))
421}
422
423#[inline(always)]
424pub(crate) fn bessel_k1_stable(x: f64) -> f64 {
425    let x_pos = x.max(1e-300);
426    if x_pos <= 2.0 {
427        return bessel_k1_small_series(x_pos);
428    }
429    let y = 2.0 / x_pos;
430    (-x_pos).exp() / x_pos.sqrt()
431        * (1.253_314_14
432            + y * (0.234_986_19
433                + y * (-0.036_556_20
434                    + y * (0.015_042_68
435                        + y * (-0.007_803_53 + y * (0.003_256_14 + y * -0.000_682_45))))))
436}
437
438#[inline(always)]
439pub(crate) fn bessel_k0_k1_small_series(x: f64) -> (f64, f64) {
440    const EULER_GAMMA: f64 = 0.577_215_664_901_532_9;
441    let y = 0.25 * x * x;
442    let log_half_plus_gamma = 0.5 * y.ln() + EULER_GAMMA;
443    let mut i0 = 1.0;
444    let mut i1 = 0.5 * x;
445    let mut harmonic = 0.0;
446    let mut y_power_over_fact_sq = 1.0;
447    let mut k0_series = 0.0;
448    let mut k0_series_y_derivative_times_y = 0.0;
449    for k in 1..=256 {
450        let kf = k as f64;
451        harmonic += 1.0 / kf;
452        y_power_over_fact_sq *= y / (kf * kf);
453        let k0_term = harmonic * y_power_over_fact_sq;
454        k0_series += k0_term;
455        k0_series_y_derivative_times_y += kf * k0_term;
456        i0 += y_power_over_fact_sq;
457        i1 += 0.5 * x * y_power_over_fact_sq / (kf + 1.0);
458        if k0_term.abs() <= f64::EPSILON * i0.abs().max(k0_series.abs()).max(1.0) {
459            break;
460        }
461    }
462
463    let k0 = -log_half_plus_gamma * i0 + k0_series;
464    let k1 = i0 / x + log_half_plus_gamma * i1 - (2.0 / x) * k0_series_y_derivative_times_y;
465    (k0, k1)
466}
467
468#[inline(always)]
469pub(crate) fn bessel_k0_small_series(x: f64) -> f64 {
470    bessel_k0_k1_small_series(x).0
471}
472
473#[inline(always)]
474pub(crate) fn bessel_k1_small_series(x: f64) -> f64 {
475    bessel_k0_k1_small_series(x).1
476}
477
478pub(crate) const DUCHON_DERIVATIVE_R_FLOOR_REL: f64 = 1e-5;
479
480pub(crate) const DUCHON_COLLISION_TAYLOR_REL: f64 = 1e-4;
481
482/// Minimum `(row, center)` pair count before a radial design sweep builds a
483/// certified [`radial_profile::RadialProfile`] instead of evaluating every
484/// pair exactly. The profile build costs a few hundred exact jet
485/// evaluations, so it only pays for itself when the sweep reuses it well
486/// beyond that; below the threshold the exact path keeps small fits
487/// bit-identical to the pre-profile behavior.
488pub(crate) const RADIAL_PROFILE_MIN_PAIRS: usize = 16_384;
489
490#[inline(always)]
491pub(crate) fn duchon_p_from_nullspace_order(order: DuchonNullspaceOrder) -> usize {
492    match order {
493        // Duchon null spaces contain all polynomials of degree < m.
494        // The public `order` knob chooses that polynomial degree cutoff:
495        //   order=0 -> constants only  -> m=1
496        //   order=1 -> constants+linear -> m=2
497        DuchonNullspaceOrder::Zero => 1,
498        DuchonNullspaceOrder::Linear => 2,
499        DuchonNullspaceOrder::Degree(degree) => degree + 1,
500    }
501}
502
503/// Returns the effective Duchon null-space order, auto-degrading when the
504/// requested order leaves no radial kernel degrees of freedom.
505///
506/// The constrained kernel block has `centers.nrows() - rank(P)` columns, where
507/// `P` is the polynomial null-space block. A valid polynomial block with
508/// exactly as many centers as columns is still useless for smoothing: every
509/// center is consumed by the side constraints and the design collapses to the
510/// polynomial tail. Degrade to the highest lower null-space order with at
511/// least one constrained kernel column.
512pub(crate) fn duchon_effective_nullspace_order(
513    centers: ArrayView2<'_, f64>,
514    order: DuchonNullspaceOrder,
515) -> DuchonNullspaceOrder {
516    if order == DuchonNullspaceOrder::Zero {
517        return order;
518    }
519    let mut effective = order;
520    while effective != DuchonNullspaceOrder::Zero
521        && centers.nrows() <= polynomial_block_from_order(centers, effective).ncols()
522    {
523        effective = duchon_previous_nullspace_order(effective);
524    }
525    if effective != order {
526        // Dedup: warn only once per (rows, cols, requested_order) per process.
527        // BFGS × P-IRLS × derivative callsites hit this path many times.
528        static SEEN: std::sync::OnceLock<
529            std::sync::Mutex<std::collections::HashSet<(usize, usize, DuchonNullspaceOrder)>>,
530        > = std::sync::OnceLock::new();
531        let seen = SEEN.get_or_init(|| std::sync::Mutex::new(std::collections::HashSet::new()));
532        let key = (centers.nrows(), centers.ncols(), order);
533        let fresh = seen.lock().map(|mut s| s.insert(key)).unwrap_or(true);
534        if fresh {
535            let requested_cols = polynomial_block_from_order(centers, order).ncols();
536            let effective_cols = polynomial_block_from_order(centers, effective).ncols();
537            log::warn!(
538                "Duchon nullspace order={:?} in dim={} with {} centers leaves no radial kernel columns (polynomial_cols={}); degrading to {:?} (polynomial_cols={})",
539                order,
540                centers.ncols(),
541                centers.nrows(),
542                requested_cols,
543                effective,
544                effective_cols
545            );
546        }
547    }
548    effective
549}
550
551/// Auto-*raise* the Duchon null-space order so the polyharmonic kernel — and
552/// any active derivative-collocation operators — clear their pointwise
553/// well-posedness margin `2(p + s) > dimension + max_operator_derivative_order`
554/// *before* the hard guard in [`validate_duchon_collocation_orders`] can fire.
555///
556/// This is the escalating twin of [`duchon_effective_nullspace_order`], which
557/// auto-*degrades* the order when too few centers remain to leave any radial
558/// kernel columns. Here the failure mode is the opposite end: a low
559/// order/power pair in dimension `dim` (e.g. `d=2`, `Linear` ⇒ `p=2`, `s=0`
560/// with stiffness/D2 active) leaves the kernel value — or its `k`-th
561/// derivative collocation — divergent at the origin, so `2(p + s) ≤ d + k`
562/// trips the guard mid-fit. Lifting `p` (the null-space order) by the smallest
563/// amount that restores the strict margin makes the guard unreachable for
564/// otherwise valid-intent configs.
565///
566/// Only `p` is lifted; the spectral power `s` and the CPD condition `2s < d`
567/// (which involves `s` and `d` alone) are untouched, so raising `p` can never
568/// invalidate a config that the requested power already satisfied.
569///
570/// `max_operator_derivative_order` is the max derivative order among the
571/// *active* operators (0 = mass/pointwise, 1 = tension/D1, 2 = stiffness/D2),
572/// exactly the value threaded into [`validate_duchon_collocation_orders`].
573pub(crate) fn duchon_order_for_operator_margin(
574    dim: usize,
575    power: f64,
576    order: DuchonNullspaceOrder,
577    max_operator_derivative_order: usize,
578) -> DuchonNullspaceOrder {
579    let margin = dim as f64 + max_operator_derivative_order as f64;
580    let mut effective = order;
581    // 2(p + s) > margin  ⇔  p > margin/2 − s. Each escalation lifts `p` by 1, so
582    // `2(p + s)` grows by 2 per step and the loop is bounded by ⌈margin/2⌉.
583    while 2.0 * (duchon_p_from_nullspace_order(effective) as f64 + power) <= margin {
584        effective = duchon_next_nullspace_order(effective);
585    }
586    if effective != order {
587        // Dedup: warn only once per (dim, power, requested_order, max_op) per
588        // process — the escalation is hit from many rebuild callsites.
589        static SEEN: std::sync::OnceLock<
590            std::sync::Mutex<std::collections::HashSet<(usize, u64, DuchonNullspaceOrder, usize)>>,
591        > = std::sync::OnceLock::new();
592        let seen = SEEN.get_or_init(|| std::sync::Mutex::new(std::collections::HashSet::new()));
593        let key = (dim, power.to_bits(), order, max_operator_derivative_order);
594        let fresh = seen.lock().map(|mut s| s.insert(key)).unwrap_or(true);
595        if fresh {
596            log::warn!(
597                "Duchon nullspace order={:?} with power={} in dim={} leaves 2*(p+s)={} \
598                 below the pointwise/collocation margin dimension+{}={} required by the \
599                 active operators; auto-raising to {:?} so the kernel stays well-posed",
600                order,
601                power,
602                dim,
603                2.0 * (duchon_p_from_nullspace_order(order) as f64 + power),
604                max_operator_derivative_order,
605                margin,
606                effective,
607            );
608        }
609    }
610    effective
611}
612
613#[inline(always)]
614pub(crate) fn gamma_lanczos(x: f64) -> f64 {
615    // Numerical Recipes / Lanczos approximation with reflection formula.
616    const G: f64 = 7.0;
617    const P: [f64; 9] = [
618        0.999_999_999_999_809_9,
619        676.520_368_121_885_1,
620        -1_259.139_216_722_402_8,
621        771.323_428_777_653_1,
622        -176.615_029_162_140_6,
623        12.507_343_278_686_905,
624        -0.138_571_095_265_720_12,
625        9.984_369_578_019_571e-6,
626        1.505_632_735_149_311_6e-7,
627    ];
628    if x < 0.5 {
629        let pix = std::f64::consts::PI * x;
630        return std::f64::consts::PI / (pix.sin() * gamma_lanczos(1.0 - x));
631    }
632    let z = x - 1.0;
633    let mut a = P[0];
634    for (i, coeff) in P.iter().enumerate().skip(1) {
635        a += coeff / (z + i as f64);
636    }
637    let t = z + G + 0.5;
638    (2.0 * std::f64::consts::PI).sqrt() * t.powf(z + 0.5) * (-t).exp() * a
639}
640
641#[inline(always)]
642pub(crate) fn bessel_k_integer_order(n: usize, z: f64) -> f64 {
643    let zz = z.max(1e-300);
644    if n == 0 {
645        return bessel_k0_stable(zz);
646    }
647    if n == 1 {
648        return bessel_k1_stable(zz);
649    }
650    let mut km1 = bessel_k0_stable(zz);
651    let mut k = bessel_k1_stable(zz);
652    for m in 1..n {
653        let kp1 = km1 + 2.0 * (m as f64) * k / zz;
654        km1 = k;
655        k = kp1;
656    }
657    k
658}
659
660#[inline(always)]
661pub(crate) fn bessel_k_half_integer_order(l: usize, z: f64) -> f64 {
662    // Exact closed-form seeds and the stable upward recurrence
663    //   K_{1/2}(z) = sqrt(π/(2z))·e^{−z},
664    //   K_{3/2}(z) = K_{1/2}(z)·(1 + 1/z),
665    //   K_{ν+1}(z) = K_{ν−1}(z) + (2ν/z)·K_ν(z)   (ν = 1/2 + m, m ≥ 1).
666    // Equivalent to the closed-form polynomial sum, but uses EXACT integer
667    // coefficients via the recurrence instead of approximate Lanczos-gamma
668    // values for `c_j = (l+j)!/(j!(l−j)!)`. The Lanczos approximation is
669    // accurate to ~1 ULP at integer arguments; that error gets amplified
670    // through catastrophic cancellation in derivative lattices of the
671    // r^μ·K_μ(κr) family. Matching the [`BesselKLadder`] arithmetic byte-
672    // for-byte also ensures the ladder/per-call paths agree exactly.
673    let zz = z.max(1e-300);
674    let k_half = (std::f64::consts::PI / (2.0 * zz)).sqrt() * (-zz).exp();
675    if l == 0 {
676        return k_half;
677    }
678    let mut km1 = k_half;
679    let mut k = k_half * (1.0 + 1.0 / zz);
680    for m in 1..l {
681        let nu = m as f64 + 0.5;
682        let kp1 = km1 + 2.0 * nu * k / zz;
683        km1 = k;
684        k = kp1;
685    }
686    k
687}
688
689#[inline(always)]
690pub(crate) fn bessel_k_real_half_integer_or_integer(
691    nu_abs: f64,
692    z: f64,
693) -> Result<f64, BasisError> {
694    let two_nu = (2.0 * nu_abs).round();
695    if (two_nu - 2.0 * nu_abs).abs() > 1e-12 {
696        crate::bail_invalid_basis!(
697            "unsupported Bessel-K order ν={nu_abs}; only integer/half-integer orders are supported"
698        );
699    }
700    let two_nu_i = two_nu as i64;
701    if two_nu_i % 2 == 0 {
702        let n = (two_nu_i / 2).max(0) as usize;
703        Ok(bessel_k_integer_order(n, z))
704    } else {
705        let l = ((two_nu_i - 1) / 2).max(0) as usize;
706        Ok(bessel_k_half_integer_order(l, z))
707    }
708}
709
710/// Precomputed coefficient for `polyharmonic_kernel` that depends only on
711/// `m` and `k_dim`, not on `r`.  Avoids repeated gamma_lanczos calls in the
712/// hot kernel evaluation loop (called n × k times per basis build).
713#[derive(Clone, Copy)]
714pub(crate) struct PolyharmonicBlockCoeff {
715    pub(crate) c: f64,
716    pub(crate) power: f64,
717    pub(crate) is_log_case: bool,
718}
719
720impl PolyharmonicBlockCoeff {
721    pub(crate) fn new(m: f64, k_dim: usize) -> Self {
722        assert!(
723            m.is_finite() && m > 0.0,
724            "PolyharmonicBlockCoeff::new: m must be finite and > 0, got {m}"
725        );
726        let k_half = 0.5 * k_dim as f64;
727        let power = 2.0 * m - k_dim as f64;
728        // Log case: k_dim is even and `2m − k_dim` is a non-negative even
729        // integer (within ε). For fractional `m` this never fires; for
730        // integer `m` it matches the original integer modulo check exactly.
731        const LOG_EPS: f64 = 1e-12;
732        let two_m = 2.0 * m;
733        let is_log_case = k_dim.is_multiple_of(2) && {
734            let n_f = (power / 2.0).round();
735            n_f >= 0.0 && (n_f * 2.0 - power).abs() < LOG_EPS
736        };
737        if is_log_case {
738            let m_int = m.round() as i64;
739            let m_minus_half_d_plus_one = (m - k_half + 1.0).round() as i64;
740            let c = polyharmonic_log_sign(m_int as usize, k_dim)
741                / (2.0_f64.powi((two_m.round() as i32) - 1)
742                    * std::f64::consts::PI.powf(k_half)
743                    * gamma_lanczos(m)
744                    * gamma_lanczos(m_minus_half_d_plus_one as f64));
745            Self {
746                c,
747                power,
748                is_log_case: true,
749            }
750        } else {
751            let c = gamma_lanczos(k_half - m)
752                / (4.0_f64.powf(m) * std::f64::consts::PI.powf(k_half) * gamma_lanczos(m));
753            Self {
754                c,
755                power,
756                is_log_case: false,
757            }
758        }
759    }
760
761    #[inline(always)]
762    pub(crate) fn eval(&self, r: f64) -> f64 {
763        if r <= 0.0 {
764            return self.origin_limit();
765        }
766        if self.is_log_case {
767            self.c * r.powf(self.power) * r.max(1e-300).ln()
768        } else {
769            self.c * r.powf(self.power)
770        }
771    }
772
773    #[inline(always)]
774    pub(crate) fn origin_limit(&self) -> f64 {
775        if self.is_log_case {
776            log_power_origin_limit(self.c, self.power, 1.0, 0.0)
777        } else {
778            log_power_origin_limit(self.c, self.power, 0.0, 1.0)
779        }
780    }
781}
782
783pub(crate) fn polyharmonic_kernel(r: f64, m: f64, k_dim: usize) -> f64 {
784    PolyharmonicBlockCoeff::new(m, k_dim).eval(r)
785}
786
787#[inline(always)]
788pub(crate) fn signed_infinity(sign: f64) -> f64 {
789    if sign.is_sign_negative() {
790        f64::NEG_INFINITY
791    } else {
792        f64::INFINITY
793    }
794}
795
796#[inline(always)]
797pub(crate) fn log_power_origin_limit(
798    coeff: f64,
799    exponent: f64,
800    log_coeff: f64,
801    pure_coeff: f64,
802) -> f64 {
803    if log_coeff == 0.0 && pure_coeff == 0.0 {
804        return 0.0;
805    }
806    if exponent > 0.0 {
807        return 0.0;
808    }
809    if exponent == 0.0 {
810        if log_coeff != 0.0 {
811            signed_infinity(-coeff * log_coeff)
812        } else {
813            coeff * pure_coeff
814        }
815    } else if log_coeff != 0.0 {
816        signed_infinity(-coeff * log_coeff)
817    } else {
818        signed_infinity(coeff * pure_coeff)
819    }
820}
821
822#[inline(always)]
823pub(crate) fn polyharmonic_log_sign(m: usize, k_dim: usize) -> f64 {
824    assert!(
825        k_dim.is_multiple_of(2),
826        "polyharmonic_log_sign requires even kernel dimension: k_dim={k_dim}, m={m}"
827    );
828    (-1.0_f64).powi(m as i32 - (k_dim as i32 / 2) + 1)
829}
830
831#[inline(always)]
832pub(crate) fn duchon_matern_block(
833    r: f64,
834    kappa: f64,
835    n_order: usize,
836    k_dim: usize,
837) -> Result<f64, BasisError> {
838    let n = n_order as f64;
839    let k_half = 0.5 * k_dim as f64;
840    let nu = n - k_half;
841    let nu_abs = nu.abs();
842    let c = kappa.powf(k_half - n)
843        / ((2.0 * std::f64::consts::PI).powf(k_half) * 2.0_f64.powf(n - 1.0) * gamma_lanczos(n));
844    if r <= 0.0 {
845        if nu > 0.0 {
846            // r^ν K_ν(κr) → 2^(ν−1) Γ(ν) κ^(−ν) as r→0+.
847            return Ok(c * 2.0_f64.powf(nu - 1.0) * gamma_lanczos(nu) * kappa.powf(-nu));
848        }
849        // ν ≤ 0: c·r^ν·K_|ν|(κr) is divergent at r=0 (logarithmically for ν=0,
850        // power-law for ν<0). The hybrid-kernel diagonal must be evaluated via
851        // duchon_hybrid_kernel_collision_value, which sums the divergent
852        // Matérn and polyharmonic blocks so the singularities cancel exactly
853        // (guaranteed by the PFD identity when 2(p+s) > d).
854        crate::bail_invalid_basis!(
855            "Duchon Matérn block at r=0 with ν={nu} ≤ 0 is divergent; \
856             evaluate the hybrid kernel diagonal via the collision routine"
857        );
858    }
859    let z = (kappa * r).max(1e-300);
860    let k_nu = bessel_k_real_half_integer_or_integer(nu_abs, z)?;
861    Ok(c * r.powf(nu) * k_nu)
862}
863
864#[inline(always)]
865pub(crate) fn polyharmonic_kernel_triplet(
866    r: f64,
867    m: f64,
868    k_dim: usize,
869) -> Result<(f64, f64, f64), BasisError> {
870    let (value, first, second, _, _) = polyharmonic_block_jet4(r, m, k_dim)?;
871    Ok((value, first, second))
872}
873
874#[inline(always)]
875pub(crate) fn falling_factorial(alpha: f64, order: usize) -> f64 {
876    (0..order).fold(1.0, |acc, idx| acc * (alpha - idx as f64))
877}
878
879#[inline(always)]
880pub(crate) fn falling_factorial_derivative(alpha: f64, order: usize) -> f64 {
881    if order == 0 {
882        return 0.0;
883    }
884    let mut total = 0.0;
885    for omit in 0..order {
886        let mut term = 1.0;
887        for idx in 0..order {
888            if idx != omit {
889                term *= alpha - idx as f64;
890            }
891        }
892        total += term;
893    }
894    total
895}
896
897/// Unified radial jet for one polyharmonic partial-fraction block.
898///
899/// Returns (φ, φ', φ'', φ''', φ'''') from a single consistent evaluation,
900/// sharing normalization constant, r_safe, and log_r. This eliminates the
901/// possibility of numerical drift between the triplet and higher-order
902/// derivative paths.
903pub(crate) fn polyharmonic_block_jet4(
904    r: f64,
905    m: f64,
906    k_dim: usize,
907) -> Result<(f64, f64, f64, f64, f64), BasisError> {
908    if !r.is_finite() || r < 0.0 {
909        crate::bail_invalid_basis!("polyharmonic distance must be finite and non-negative");
910    }
911    assert!(
912        m.is_finite() && m > 0.0,
913        "polyharmonic_block_jet4: m must be finite and > 0, got {m}"
914    );
915
916    let k_half = 0.5 * k_dim as f64;
917    let alpha = 2.0 * m - k_dim as f64;
918    // Log case: k_dim even and `2m − k_dim` is a non-negative even integer
919    // (within ε). For fractional `m` this never fires.
920    const LOG_EPS: f64 = 1e-12;
921    let is_log_case = k_dim.is_multiple_of(2) && {
922        let n_f = (alpha / 2.0).round();
923        n_f >= 0.0 && (n_f * 2.0 - alpha).abs() < LOG_EPS
924    };
925    if is_log_case {
926        let m_int = m.round() as usize;
927        let c = polyharmonic_log_sign(m_int, k_dim)
928            / (2.0_f64.powi((2 * m_int - 1) as i32)
929                * std::f64::consts::PI.powf(k_half)
930                * gamma_lanczos(m)
931                * gamma_lanczos((m_int - k_dim / 2 + 1) as f64));
932        let mut out = [0.0; 5];
933        for d in 0..5 {
934            let e = alpha - d as f64;
935            let ff = falling_factorial(alpha, d);
936            let ff_d = falling_factorial_derivative(alpha, d);
937            out[d] = if r <= 0.0 {
938                log_power_origin_limit(c, e, ff, ff_d)
939            } else {
940                c * r.powf(e) * (ff * r.ln() + ff_d)
941            };
942        }
943        return Ok((out[0], out[1], out[2], out[3], out[4]));
944    }
945
946    let c = gamma_lanczos(k_half - m)
947        / (4.0_f64.powf(m) * std::f64::consts::PI.powf(k_half) * gamma_lanczos(m));
948    let mut out = [0.0; 5];
949    for d in 0..5 {
950        let e = alpha - d as f64;
951        let ff = falling_factorial(alpha, d);
952        out[d] = if r <= 0.0 {
953            log_power_origin_limit(c, e, 0.0, ff)
954        } else {
955            c * ff * r.powf(e)
956        };
957    }
958    Ok((out[0], out[1], out[2], out[3], out[4]))
959}
960
961#[inline(always)]
962pub(crate) fn log_power_family_derivative(
963    exponent: f64,
964    log_coeff: f64,
965    pure_coeff: f64,
966) -> (f64, f64, f64) {
967    (
968        exponent - 1.0,
969        exponent * log_coeff,
970        exponent * pure_coeff + log_coeff,
971    )
972}
973
974#[inline(always)]
975pub(crate) fn log_power_family_value(
976    r: f64,
977    coeff: f64,
978    exponent: f64,
979    log_coeff: f64,
980    pure_coeff: f64,
981) -> f64 {
982    if r <= 0.0 {
983        log_power_origin_limit(coeff, exponent, log_coeff, pure_coeff)
984    } else {
985        coeff * r.powf(exponent) * (log_coeff * r.ln() + pure_coeff)
986    }
987}
988
989#[inline(always)]
990pub(crate) fn duchon_polyharmonic_operator_block_jets(
991    r: f64,
992    m: f64,
993    k_dim: usize,
994) -> Result<(f64, f64, f64, f64), BasisError> {
995    if !r.is_finite() || r < 0.0 {
996        crate::bail_invalid_basis!("polyharmonic distance must be finite and non-negative");
997    }
998    assert!(
999        m.is_finite() && m > 0.0,
1000        "duchon_polyharmonic_operator_block_jets: m must be finite and > 0, got {m}"
1001    );
1002
1003    let k_half = 0.5 * k_dim as f64;
1004    let alpha = 2.0 * m - k_dim as f64;
1005    // Log case: k_dim even and `2m − k_dim` is a non-negative even integer
1006    // (within ε). For fractional `m` this never fires; for integer `m` it
1007    // matches the original `k_dim % 2 == 0 && m >= k_dim / 2` check.
1008    const LOG_EPS: f64 = 1e-12;
1009    let is_log_case = k_dim.is_multiple_of(2) && {
1010        let n_f = (alpha / 2.0).round();
1011        n_f >= 0.0 && (n_f * 2.0 - alpha).abs() < LOG_EPS
1012    };
1013    let (c, phi_log_coeff, phi_pure_coeff) = if is_log_case {
1014        let m_int = m.round() as usize;
1015        (
1016            polyharmonic_log_sign(m_int, k_dim)
1017                / (2.0_f64.powi((2 * m_int - 1) as i32)
1018                    * std::f64::consts::PI.powf(k_half)
1019                    * gamma_lanczos(m)
1020                    * gamma_lanczos((m_int - k_dim / 2 + 1) as f64)),
1021            1.0,
1022            0.0,
1023        )
1024    } else {
1025        (
1026            gamma_lanczos(k_half - m)
1027                / (4.0_f64.powf(m) * std::f64::consts::PI.powf(k_half) * gamma_lanczos(m)),
1028            0.0,
1029            1.0,
1030        )
1031    };
1032
1033    let (phi_r_exp, phi_r_log, phi_r_pure) =
1034        log_power_family_derivative(alpha, phi_log_coeff, phi_pure_coeff);
1035    let q_exp = phi_r_exp - 1.0;
1036    let q = log_power_family_value(r, c, q_exp, phi_r_log, phi_r_pure);
1037
1038    let (q_r_exp_raw, q_r_log, q_r_pure) =
1039        log_power_family_derivative(q_exp, phi_r_log, phi_r_pure);
1040    let t_exp = q_r_exp_raw - 1.0;
1041    let t = log_power_family_value(r, c, t_exp, q_r_log, q_r_pure);
1042
1043    let (t_r_exp, t_r_log, t_r_pure) = log_power_family_derivative(t_exp, q_r_log, q_r_pure);
1044    let t_r = log_power_family_value(r, c, t_r_exp, t_r_log, t_r_pure);
1045
1046    let (t_rr_exp, t_rr_log, t_rr_pure) = log_power_family_derivative(t_r_exp, t_r_log, t_r_pure);
1047    let t_rr = log_power_family_value(r, c, t_rr_exp, t_rr_log, t_rr_pure);
1048
1049    Ok((q, t, t_r, t_rr))
1050}
1051
1052/// Shared Bessel-K ladder for one evaluation point `z = κ·r`.
1053///
1054/// Every Matérn partial-fraction block and every term of its radial
1055/// derivative lattice consumes `K_ν(z)` at orders from ONE parity class
1056/// (integer when the covariate dimension is even, half-integer when odd),
1057/// differing by integers — and all at the SAME `z`. The previous code
1058/// restarted the `K₀/K₁` (or closed-form half-integer) seed evaluation and
1059/// the upward recurrence inside every per-term Bessel call: hundreds of
1060/// redundant seed+recurrence runs per `(row, center)` pair, which the #979
1061/// CTN stage-1 stack profile showed to be the dominant cost of every Duchon
1062/// κ-trial at scale. One ladder per point replaces all of them: two seed
1063/// evaluations plus the standard upward recurrence
1064/// `K_{ν+1}(z) = K_{ν−1}(z) + (2ν/z)·K_ν(z)`, which is the numerically
1065/// STABLE direction for `K` (it grows with ν). For integer orders this is
1066/// arithmetic-identical to the old per-call `bessel_k_integer_order`, which
1067/// ran the same seeds and recurrence internally; for half-integer orders the
1068/// recurrence is exact and replaces the per-order closed-form sum.
1069pub(crate) struct BesselKLadder {
1070    /// `values[i] = K_{base + i}(z)` with `base ∈ {0, ½}`.
1071    pub(crate) values: SmallVec<[f64; 16]>,
1072    pub(crate) half_integer: bool,
1073}
1074
1075impl BesselKLadder {
1076    pub(crate) fn build(z: f64, half_integer: bool, max_order_steps: usize) -> Self {
1077        let zz = z.max(1e-300);
1078        let mut values: SmallVec<[f64; 16]> = SmallVec::with_capacity(max_order_steps + 2);
1079        if half_integer {
1080            // K_{1/2}(z) = √(π/(2z))·e^{−z};  K_{3/2}(z) = K_{1/2}(z)·(1 + 1/z).
1081            let k_half = (std::f64::consts::PI / (2.0 * zz)).sqrt() * (-zz).exp();
1082            values.push(k_half);
1083            values.push(k_half * (1.0 + 1.0 / zz));
1084        } else {
1085            values.push(bessel_k0_stable(zz));
1086            values.push(bessel_k1_stable(zz));
1087        }
1088        let base = if half_integer { 0.5 } else { 0.0 };
1089        for i in 1..max_order_steps {
1090            let nu = base + i as f64;
1091            let next = values[i - 1] + 2.0 * nu * values[i] / zz;
1092            values.push(next);
1093        }
1094        Self {
1095            values,
1096            half_integer,
1097        }
1098    }
1099
1100    /// `K_{|order|}(z)` from the ladder (`K_{−ν} = K_ν`).
1101    #[inline]
1102    pub(crate) fn k_abs(&self, order_abs: f64) -> f64 {
1103        let base = if self.half_integer { 0.5 } else { 0.0 };
1104        let idx = (order_abs - base).round() as usize;
1105        self.values[idx]
1106    }
1107}
1108
1109/// Radial-derivative jets of the Matérn family `coeff·r^μ·K_μ(κr)` up to
1110/// order `max_j ≤ 4`, evaluated against a shared [`BesselKLadder`].
1111///
1112/// Exact recurrence derived from `d/dr[r^ν K_ν(κr)]` and the Bessel identity
1113/// `dK_ν/dz = −K_{ν−1}(z) − (ν/z)K_ν(z)`:
1114///
1115///   g⁽⁰⁾ = c · r^ν · K_ν(z)
1116///   g⁽¹⁾ = −c · κ · r^ν · K_{ν−1}(z)
1117///   g⁽²⁾ = c·κ² r^ν K_{ν−2} − c·κ r^{ν−1} K_{ν−1}, ...
1118///
1119/// Same derivative lattice as the per-order reference implementation
1120/// `duchon_matern_family_radial_derivative_reference` (kept in the test
1121/// module as the equivalence oracle)
1122/// (term-for-term, in the same order), but: (a) the lattice is expanded
1123/// incrementally once instead of rebuilt from scratch per derivative order,
1124/// (b) terms live in a fixed-capacity stack buffer instead of per-call heap
1125/// `Vec`s (≤ 2^max_j ≤ 16 terms), and (c) every Bessel factor is an indexed
1126/// ladder read instead of a fresh seed+recurrence evaluation. Only orders
1127/// `0..=max_j` are computed — the q-family consumes order 0 only and the
1128/// t-family orders ≤ 2, where the old path always expanded to order 4 and
1129/// discarded the tail.
1130pub(crate) fn duchon_matern_family_jets_with_ladder(
1131    r: f64,
1132    kappa: f64,
1133    coeff: f64,
1134    mu: f64,
1135    max_j: usize,
1136    ladder: &BesselKLadder,
1137    out: &mut [f64],
1138) -> Result<(), BasisError> {
1139    if max_j > 4 || out.len() <= max_j {
1140        crate::bail_invalid_basis!(
1141            "Duchon Matérn-family ladder jets support derivative orders 0..=4 with an output slot per order"
1142        );
1143    }
1144    if r <= 0.0 {
1145        out[..=max_j].fill(0.0);
1146        if mu > 0.0 {
1147            out[0] = coeff * 2.0_f64.powf(mu - 1.0) * gamma_lanczos(mu) * kappa.powf(-mu);
1148        }
1149        return Ok(());
1150    }
1151    let mut terms: SmallVec<[DuchonMaternDerivativeTerm; 16]> =
1152        smallvec![DuchonMaternDerivativeTerm {
1153            coeff,
1154            kappa_power: 0,
1155            r_power: mu,
1156            bessel_order: mu,
1157        }];
1158    for (j, slot) in out.iter_mut().enumerate().take(max_j + 1) {
1159        if j > 0 {
1160            let mut next: SmallVec<[DuchonMaternDerivativeTerm; 16]> =
1161                SmallVec::with_capacity(terms.len() * 2);
1162            for term in &terms {
1163                let stay_coeff = term.coeff * (term.r_power - term.bessel_order);
1164                if stay_coeff != 0.0 {
1165                    next.push(DuchonMaternDerivativeTerm {
1166                        coeff: stay_coeff,
1167                        kappa_power: term.kappa_power,
1168                        r_power: term.r_power - 1.0,
1169                        bessel_order: term.bessel_order,
1170                    });
1171                }
1172                next.push(DuchonMaternDerivativeTerm {
1173                    coeff: -term.coeff,
1174                    kappa_power: term.kappa_power + 1,
1175                    r_power: term.r_power,
1176                    bessel_order: term.bessel_order - 1.0,
1177                });
1178            }
1179            terms = next;
1180        }
1181        let mut value = KahanSum::default();
1182        for term in &terms {
1183            if term.coeff == 0.0 {
1184                continue;
1185            }
1186            value.add(
1187                term.coeff
1188                    * kappa.powi(term.kappa_power as i32)
1189                    * r.powf(term.r_power)
1190                    * ladder.k_abs(term.bessel_order.abs()),
1191            );
1192        }
1193        *slot = value.sum();
1194    }
1195    Ok(())
1196}
1197
1198/// Maximum ladder steps (`K_base ..= K_{base+steps}`) needed by the q/t
1199/// operator families of Matérn block `n` in dimension `k_dim`: the q-family
1200/// reads `K_{|ν−1|}` and the t-family `K_{|ν−2−j|}` for `j ≤ 2`, ν = n − d/2.
1201pub(crate) fn duchon_matern_block_max_ladder_steps(n_order: usize, k_dim: usize) -> usize {
1202    let nu = n_order as f64 - 0.5 * k_dim as f64;
1203    let candidates = [
1204        (nu - 1.0).abs(),
1205        (nu - 2.0).abs(),
1206        (nu - 3.0).abs(),
1207        (nu - 4.0).abs(),
1208    ];
1209    let max_abs = candidates.iter().copied().fold(0.0_f64, f64::max);
1210    max_abs.floor() as usize + 1
1211}
1212
1213pub(crate) fn duchon_matern_operator_block_jets_with_ladder(
1214    r: f64,
1215    kappa: f64,
1216    n_order: usize,
1217    k_dim: usize,
1218    ladder: &BesselKLadder,
1219) -> Result<(f64, f64, f64, f64), BasisError> {
1220    if r <= 0.0 {
1221        return Ok((0.0, 0.0, 0.0, 0.0));
1222    }
1223    let n = n_order as f64;
1224    let k_half = 0.5 * k_dim as f64;
1225    let nu = n - k_half;
1226    let c = kappa.powf(k_half - n)
1227        / ((2.0 * std::f64::consts::PI).powf(k_half) * 2.0_f64.powf(n - 1.0) * gamma_lanczos(n));
1228
1229    let mut q_out = [0.0_f64; 1];
1230    duchon_matern_family_jets_with_ladder(r, kappa, -c * kappa, nu - 1.0, 0, ladder, &mut q_out)?;
1231    let mut t_out = [0.0_f64; 3];
1232    duchon_matern_family_jets_with_ladder(
1233        r,
1234        kappa,
1235        c * kappa * kappa,
1236        nu - 2.0,
1237        2,
1238        ladder,
1239        &mut t_out,
1240    )?;
1241    Ok((q_out[0], t_out[0], t_out[1], t_out[2]))
1242}
1243
1244#[inline(always)]
1245pub(crate) fn pure_duchon_block_order(p_order: usize, s_order: f64) -> f64 {
1246    p_order as f64 + s_order
1247}
1248
1249pub(crate) fn validate_duchon_kernel_orders(
1250    length_scale: Option<f64>,
1251    p_order: usize,
1252    s_order: f64,
1253    k_dim: usize,
1254) -> Result<(), BasisError> {
1255    if k_dim == 0 {
1256        crate::bail_invalid_basis!("Duchon basis requires at least one covariate dimension");
1257    }
1258    if let Some(scale) = length_scale
1259        && (!scale.is_finite() || scale <= 0.0)
1260    {
1261        crate::bail_invalid_basis!("Duchon hybrid length_scale must be finite and positive");
1262    }
1263    // Two independent well-posedness conditions on (p, s, d) for pure Duchon.
1264    //
1265    // (1) CPD-vs-nullspace adequacy — gated below on `length_scale.is_none()`.
1266    //     The pure-polyharmonic kernel of effective order m = p+s in R^d is
1267    //     phi(r) = r^{2m-d}, or r^{2m-d}·log r when 2m-d is a non-negative
1268    //     even integer (the "log case", reached precisely when d is even
1269    //     and m >= d/2). Wendland's Theorem 8.17 / 8.18 give its
1270    //     conditional-positive-definiteness order:
1271    //
1272    //         d odd,  exponent half-integer:  ceil((2m-d)/2) = m - (d-1)/2
1273    //         d even, log case:               (2m-d)/2 + 1   = m - d/2 + 1
1274    //
1275    //     Duchon interpolation with polynomial nullspace P_p (polynomials
1276    //     of degree < p) is uniquely solvable iff the kernel's CPD order
1277    //     does not exceed p. Substituting m = p + s:
1278    //
1279    //         d odd:  s <= (d-1)/2     <=>  2s <= d - 1
1280    //         d even: s <= d/2 - 1     <=>  2s <= d - 2
1281    //
1282    //     Both branches collapse to `2s < d` once we use that s and d are
1283    //     integers and 2s is therefore even (so `2s = d - 1` is impossible
1284    //     for even d, and `2s <= d - 2` is just `2s < d`).
1285    //
1286    //     Counter-example admitted if this guard is dropped: d=2, p=1, s=1
1287    //     passes the spectral check (2(1+1)=4 > 2) and builds the TPS
1288    //     kernel c·r²·log r against a constants-only nullspace P_1; the
1289    //     interpolation form is not PD on lambda perp P_1 and the fitted
1290    //     penalty is meaningless.
1291    //
1292    //     The hybrid (Matérn-blended) Duchon kernel sidesteps this entirely:
1293    //     the Matérn remainder is strictly positive definite (CPD order 0),
1294    //     so any P_p suffices — hence the `length_scale.is_none()` gate.
1295    //
1296    // (2) Spectral kernel-existence — universal, gated below on the sum.
1297    //     The pointwise kernel comes from the inverse Fourier of
1298    //     1/|xi|^{2(p+s)}, which is a finite distribution at the origin
1299    //     iff `2(p+s) > d`. Below that threshold the radial kernel value
1300    //     diverges and there is nothing to evaluate.
1301    if !s_order.is_finite() || s_order < 0.0 {
1302        crate::bail_invalid_basis!("Duchon spectral power must be finite and ≥ 0; got s={s_order}");
1303    }
1304    if length_scale.is_none() && p_order < 2 && 2.0 * s_order >= k_dim as f64 {
1305        crate::bail_invalid_basis!(
1306            "pure Duchon requires power < dimension/2 for nullspace degree < {p_order}; got power={s_order}, dimension={k_dim}"
1307        );
1308    }
1309    let spectral_order = 2.0 * (p_order as f64 + s_order);
1310    if spectral_order <= k_dim as f64 {
1311        crate::bail_invalid_basis!(
1312            "Duchon pointwise kernel values require 2*(p+s) > dimension; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1313        );
1314    }
1315    Ok(())
1316}
1317
1318pub(crate) fn validate_duchon_collocation_orders(
1319    length_scale: Option<f64>,
1320    p_order: usize,
1321    s_order: f64,
1322    k_dim: usize,
1323    max_operator_derivative_order: usize,
1324) -> Result<(), BasisError> {
1325    // Kernel-level conditions (existence + CPD/nullspace adequacy) come first;
1326    // the operator-level conditions below build on a pointwise-valid kernel.
1327    validate_duchon_kernel_orders(length_scale, p_order, s_order, k_dim)?;
1328    // The spectral_order > k_dim + k checks below are C^k-at-origin
1329    // conditions: for the polyharmonic kernel r^{2(p+s)-d} (or the log
1330    // variant) to admit k-th radial derivatives in the distributional sense
1331    // — and therefore for k-th-order derivative *collocation* of the
1332    // kernel against centers to produce a finite operator — we need its
1333    // exponent to clear the next k orders of differentiation at the
1334    // origin. Equivalently: 2(p+s) - d > k.
1335    //
1336    // Note these are independent of the CPD/nullspace check. The penalty
1337    // matrices ultimately built from these collocation operators are of
1338    // the form S = D_k^T D_k and are PSD by construction; the discipline
1339    // here is purely about *existence* of D_k itself.
1340    let spectral_order = 2.0 * (p_order as f64 + s_order);
1341    if max_operator_derivative_order >= 1 && spectral_order <= k_dim as f64 + 1.0 {
1342        crate::bail_invalid_basis!(
1343            "Duchon D1 collocation requires 2*(p+s) > dimension+1; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1344        );
1345    }
1346    if max_operator_derivative_order >= 2 && spectral_order <= k_dim as f64 + 2.0 {
1347        crate::bail_invalid_basis!(
1348            "Duchon D2 collocation requires 2*(p+s) > dimension+2; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1349        );
1350    }
1351    Ok(())
1352}
1353
1354#[derive(Debug, Clone)]
1355pub struct DuchonPartialFractionCoeffs {
1356    pub(crate) a: Vec<f64>,
1357    pub(crate) b: Vec<f64>,
1358}
1359
1360#[inline(always)]
1361pub(crate) fn duchon_partial_fraction_coeffs(
1362    p_order: usize,
1363    s_order: usize,
1364    kappa: f64,
1365) -> DuchonPartialFractionCoeffs {
1366    // 1/(ρ^{2p}(κ²+ρ²)^s) = Σ a_m/ρ^{2m} + Σ b_n/(κ²+ρ²)^n
1367    let mut a = vec![0.0_f64; p_order + 1]; // 1-based m
1368    let mut b = vec![0.0_f64; s_order + 1]; // 1-based n
1369    if s_order == 0 {
1370        if p_order > 0 {
1371            // Pure intrinsic polyharmonic case: no Matérn tail remains, so the
1372            // spectrum is exactly 1 / ρ^(2p).
1373            a[p_order] = 1.0;
1374        }
1375        return DuchonPartialFractionCoeffs { a, b };
1376    }
1377    for m in 1..=p_order {
1378        let sign = if (p_order - m).is_multiple_of(2) {
1379            1.0
1380        } else {
1381            -1.0
1382        };
1383        let expo = -2.0 * (s_order + p_order - m) as f64;
1384        let comb = binomial_f64(s_order + p_order - m - 1, p_order - m);
1385        a[m] = sign * kappa.powf(expo) * comb;
1386    }
1387    for n in 1..=s_order {
1388        let sign = if p_order.is_multiple_of(2) { 1.0 } else { -1.0 };
1389        let expo = -2.0 * (p_order + s_order - n) as f64;
1390        let comb = if p_order == 0 && n == s_order {
1391            // p=0 reduces to the pure Matérn block 1/(κ²+ρ²)^s.
1392            1.0
1393        } else {
1394            let top = p_order + s_order - n - 1;
1395            binomial_f64(top, s_order - n)
1396        };
1397        b[n] = sign * kappa.powf(expo) * comb;
1398    }
1399    DuchonPartialFractionCoeffs { a, b }
1400}
1401
1402/// 64-node Gauss–Legendre rule on `[0, 1]` (nodes already mapped from the
1403/// canonical `[-1, 1]` interval, weights scaled by the `1/2` Jacobian).
1404///
1405/// Used by [`duchon_hybrid_kernel_stable_integral`] to evaluate the hybrid
1406/// Duchon–Matérn kernel without the catastrophically-cancelling
1407/// partial-fraction sum (gam#1424). The integrand is smooth and strictly
1408/// positive on `(0, 1)`, so a fixed high-order rule reproduces the kernel to
1409/// ~1e-15 relative accuracy across all reachable high-dimensional orders.
1410fn gauss_legendre_01_64() -> &'static [(f64, f64)] {
1411    use std::sync::OnceLock;
1412    static NODES: OnceLock<Vec<(f64, f64)>> = OnceLock::new();
1413    NODES.get_or_init(|| {
1414        // Newton iteration on the Legendre polynomial roots (the classic
1415        // `gauleg` recipe). The N-point rule is symmetric about the midpoint, so
1416        // only the lower half of the roots is solved for and the rule is
1417        // mirrored. Computed once; converges to full f64 precision in a handful
1418        // of Newton steps per root.
1419        const N: usize = 64;
1420        let nf = N as f64;
1421        let mut nodes: Vec<(f64, f64)> = Vec::with_capacity(N);
1422        let half = N.div_ceil(2);
1423        for i in 0..half {
1424            // Initial guess for the i-th root on [-1, 1] (Chebyshev-like).
1425            let mut x = (std::f64::consts::PI * (i as f64 + 0.75) / (nf + 0.5)).cos();
1426            let mut dp = 0.0_f64;
1427            for _ in 0..100 {
1428                // Evaluate the Legendre polynomial P_N(x) and derivative P_N'(x)
1429                // via the three-term recurrence.
1430                let mut p0 = 1.0_f64;
1431                let mut p1 = x;
1432                for k in 2..=N {
1433                    let kf = k as f64;
1434                    let p2 = ((2.0 * kf - 1.0) * x * p1 - (kf - 1.0) * p0) / kf;
1435                    p0 = p1;
1436                    p1 = p2;
1437                }
1438                // P_N'(x) = N (x P_N(x) − P_{N−1}(x)) / (x² − 1).
1439                dp = nf * (x * p1 - p0) / (x * x - 1.0);
1440                let dx = p1 / dp;
1441                x -= dx;
1442                if dx.abs() <= 1e-16 * x.abs().max(1.0) {
1443                    break;
1444                }
1445            }
1446            // Gauss–Legendre weight: 2 / ((1 − x²) P_N'(x)²).
1447            let w = 2.0 / ((1.0 - x * x) * dp * dp);
1448            // x is the i-th root counting inward from +1; mirror to −x.
1449            nodes.push((x, w));
1450            if x.abs() > 1e-300 {
1451                nodes.push((-x, w));
1452            }
1453        }
1454        // Sort by node, then map [-1, 1] -> [0, 1] with the 1/2 Jacobian.
1455        nodes.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
1456        nodes
1457            .into_iter()
1458            .map(|(x, w)| (0.5 * (x + 1.0), 0.5 * w))
1459            .collect()
1460    })
1461}
1462
1463/// Evaluate the hybrid Duchon–Matérn kernel
1464/// `φ(r) = F^{-1}[ ρ^{-2p} (κ²+ρ²)^{-s} ](r)` via a single, cancellation-free
1465/// 1-D integral (gam#1424).
1466///
1467/// The partial-fraction expansion `Σ a_m/ρ^{2m} + Σ b_n/(κ²+ρ²)^n` evaluates
1468/// the radial kernel as an alternating sum of individually huge polyharmonic
1469/// (`r^{2m-d}`) and Matérn blocks whose leading singular parts cancel. For
1470/// high `d` (e.g. d=16, p=2, s=7) the largest block is ~1e3 while the true
1471/// value is ~1e-13, so f64 loses *every* significant digit and the resulting
1472/// Gram matrix is no longer PSD (λ_min ≈ −0.26 after normalization).
1473///
1474/// Using the Schwinger / Feynman parametrization of both rational factors and
1475/// performing the Gaussian (radial inverse-FT) integral analytically reduces
1476/// the kernel to
1477///
1478/// ```text
1479///   φ(r) = (4π)^{-d/2} / (Γ(p)Γ(s))
1480///          · ∫₀¹ (1-w)^{p-1} w^{s-1} · 2(B/A)^{b/2} K_b(2√(AB)) dw,
1481///   with  b = p + s − d/2,  A = w κ²,  B = r²/4.
1482/// ```
1483///
1484/// The integrand is smooth and strictly positive on `(0, 1)` (no cancellation),
1485/// so a fixed 64-point Gauss–Legendre rule is accurate to ~1e-15 relative.
1486/// The `r = 0` diagonal has the closed form
1487/// `φ(0) = (4π)^{-d/2} Γ(b)/(Γ(p)Γ(s)) κ^{-2b} B(s−b, p)`.
1488///
1489/// Requires `b = p + s − d/2 > 0` (kernel existence, `2(p+s) > d`) and
1490/// `s − b = d/2 − p > 0` (integrable `w → 0` endpoint), i.e. `2p < d`. Callers
1491/// must check [`duchon_hybrid_stable_integral_applies`] before invoking.
1492pub(crate) fn duchon_hybrid_kernel_stable_integral(
1493    r: f64,
1494    kappa: f64,
1495    p_order: usize,
1496    s_order: usize,
1497    k_dim: usize,
1498) -> Result<f64, BasisError> {
1499    assert!(
1500        duchon_hybrid_stable_integral_applies(p_order, s_order, k_dim),
1501        "duchon_hybrid_kernel_stable_integral precondition violated: 2(p+s) > d and 2p < d required (p={p_order}, s={s_order}, d={k_dim})"
1502    );
1503    let p = p_order as f64;
1504    let s = s_order as f64;
1505    let half_d = 0.5 * k_dim as f64;
1506    let b = p + s - half_d;
1507    let pref = (4.0 * std::f64::consts::PI).powf(-half_d) / (gamma_lanczos(p) * gamma_lanczos(s));
1508    if r == 0.0 {
1509        // φ(0) = pref · Γ(b) · κ^{-2b} · B(s−b, p),  B(x,y)=Γ(x)Γ(y)/Γ(x+y).
1510        let beta = gamma_lanczos(s - b) * gamma_lanczos(p) / gamma_lanczos(s - b + p);
1511        let value = pref * gamma_lanczos(b) * kappa.powf(-2.0 * b) * beta;
1512        if !value.is_finite() {
1513            crate::bail_invalid_basis!(
1514                "non-finite Duchon hybrid diagonal (stable form) for p={p_order}, s={s_order}, d={k_dim}"
1515            );
1516        }
1517        return Ok(value);
1518    }
1519    let mut acc = KahanSum::default();
1520    for &(w, weight) in gauss_legendre_01_64() {
1521        // Smooth term  2(B/A)^{b/2} K_b(2√(AB)) = 2 (r/(2κ√w))^b K_b(κ r √w).
1522        let sqrt_w = w.sqrt();
1523        let z = (kappa * r * sqrt_w).max(1e-300);
1524        let k_b = bessel_k_real_half_integer_or_integer(b.abs(), z)?;
1525        let smooth = 2.0 * (r / (2.0 * kappa * sqrt_w)).powf(b) * k_b;
1526        let factor = (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * smooth;
1527        acc.add(weight * factor);
1528    }
1529    let value = pref * acc.sum();
1530    if !value.is_finite() {
1531        crate::bail_invalid_basis!(
1532            "non-finite Duchon hybrid value (stable form) at r={r}, p={p_order}, s={s_order}, d={k_dim}"
1533        );
1534    }
1535    Ok(value)
1536}
1537
1538/// Radial operator scalars `(q, t, t_r, t_rr)` of the hybrid Duchon–Matérn
1539/// kernel via the same cancellation-free single integral as
1540/// [`duchon_hybrid_kernel_stable_integral`], differentiated under the integral
1541/// sign (gam#1424 / gam#1453).
1542///
1543/// The partial-fraction operator core (`duchon_regularized_operator_core`)
1544/// assembles `q, t` as a sign-alternating sum of polyharmonic and Matérn
1545/// *operator* blocks. In high dimensions (e.g. d=16, p=1, s=9) each block is
1546/// ~1e3 while the true operator scalar is ~1e-13, so f64 loses every
1547/// significant digit — Kahan summation fixes accumulation, not the
1548/// cancellation between huge opposing terms, leaving `q, t` with ~1e-2 relative
1549/// noise. That floor sits above the Chebyshev profile certificate, so the
1550/// production profile cannot certify (gam#1453).
1551///
1552/// This routine instead differentiates the smooth per-`w` integrand
1553/// `g(r,w) = 2 (r/(2c))^b K_b(c r)`, `c = κ√w`, in `r`. Each `w`-slice is a
1554/// single well-conditioned `r^a K_ν(c r)` term whose `r`-derivatives are exact
1555/// (`d/dr[r^a K_ν(c r)] = a r^{a-1} K_ν(c r) − (c/2) r^a (K_{ν-1}+K_{ν+1})`),
1556/// so there is no cross-block cancellation. The radial derivatives `φ′…φ⁗`
1557/// are integrated against the same `(1-w)^{p-1} w^{s-1}` weight and the
1558/// 64-node Gauss–Legendre rule, then the operator scalars are assembled from
1559/// the standard radial relations
1560/// `q = φ′/r`, `t = q′/r`, `t_r = (q″−t)/r`, `t_rr = q‴/r − 2q″/r² + 2q′/r³`.
1561///
1562/// Requires the same precondition as the kernel form
1563/// ([`duchon_hybrid_stable_integral_applies`]) and `r > 0`.
1564pub(crate) fn duchon_hybrid_operator_stable_integral(
1565    r: f64,
1566    kappa: f64,
1567    p_order: usize,
1568    s_order: usize,
1569    k_dim: usize,
1570) -> Result<DuchonRegularizedOperatorCore, BasisError> {
1571    assert!(
1572        duchon_hybrid_stable_integral_applies(p_order, s_order, k_dim),
1573        "duchon_hybrid_operator_stable_integral precondition violated: 2(p+s) > d and 2p < d required (p={p_order}, s={s_order}, d={k_dim})"
1574    );
1575    assert!(
1576        r > 0.0 && r.is_finite(),
1577        "duchon_hybrid_operator_stable_integral requires r > 0, got r={r}"
1578    );
1579    let p = p_order as f64;
1580    let s = s_order as f64;
1581    let half_d = 0.5 * k_dim as f64;
1582    let b = p + s - half_d;
1583    let pref = (4.0 * std::f64::consts::PI).powf(-half_d) / (gamma_lanczos(p) * gamma_lanczos(s));
1584
1585    // Accumulate φ′, φ″, φ‴, φ⁗ across the Gauss–Legendre nodes. (φ itself is
1586    // not needed for the operator scalars.)
1587    let mut d1 = KahanSum::default();
1588    let mut d2 = KahanSum::default();
1589    let mut d3 = KahanSum::default();
1590    let mut d4 = KahanSum::default();
1591
1592    for &(w, weight) in gauss_legendre_01_64() {
1593        let sqrt_w = w.sqrt();
1594        let c = (kappa * sqrt_w).max(1e-300);
1595        let z = (c * r).max(1e-300);
1596
1597        // Smooth integrand g(r) = A · r^b · K_b(c r),  A = 2 (2c)^{-b}.
1598        // Differentiate the symbolic term list (coef, a, ν-offset) in r:
1599        //   d/dr[c0 r^a K_{b+j}(c r)]
1600        //     = c0·a · r^{a-1} K_{b+j}(c r)
1601        //       − c0·(c/2) · r^a (K_{b+j-1}(c r) + K_{b+j+1}(c r)).
1602        // Four derivatives need ν-offsets in [-4, 4] around b.
1603        let a0 = 2.0 * (2.0 * c).powf(-b);
1604        let mut terms: Vec<(f64, f64, i32)> = vec![(a0, b, 0)];
1605        // Cache K_{b+j}(z) for j ∈ [-4, 4] (K is even in order → use |·|).
1606        let bessel = |j: i32| -> Result<f64, BasisError> {
1607            bessel_k_real_half_integer_or_integer((b + j as f64).abs(), z)
1608        };
1609        let evaluate = |terms: &Vec<(f64, f64, i32)>| -> Result<f64, BasisError> {
1610            let mut acc = KahanSum::default();
1611            for &(c0, a, j) in terms {
1612                if c0 == 0.0 {
1613                    continue;
1614                }
1615                acc.add(c0 * r.powf(a) * bessel(j)?);
1616            }
1617            Ok(acc.sum())
1618        };
1619
1620        let mut slice_derivs = [0.0_f64; 4];
1621        for slot in slice_derivs.iter_mut() {
1622            // Differentiate the current term list once.
1623            let mut next: Vec<(f64, f64, i32)> = Vec::with_capacity(terms.len() * 3);
1624            for &(c0, a, j) in &terms {
1625                if c0 == 0.0 {
1626                    continue;
1627                }
1628                if a != 0.0 {
1629                    next.push((c0 * a, a - 1.0, j));
1630                }
1631                let half = -c0 * c * 0.5;
1632                next.push((half, a, j - 1));
1633                next.push((half, a, j + 1));
1634            }
1635            terms = next;
1636            *slot = evaluate(&terms)?;
1637        }
1638
1639        d1.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[0]);
1640        d2.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[1]);
1641        d3.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[2]);
1642        d4.add(weight * (1.0 - w).powf(p - 1.0) * w.powf(s - 1.0) * slice_derivs[3]);
1643    }
1644
1645    let phi1 = pref * d1.sum();
1646    let phi2 = pref * d2.sum();
1647    let phi3 = pref * d3.sum();
1648    let phi4 = pref * d4.sum();
1649    if !(phi1.is_finite() && phi2.is_finite() && phi3.is_finite() && phi4.is_finite()) {
1650        crate::bail_invalid_basis!(
1651            "non-finite Duchon hybrid operator (stable form) at r={r}, p={p_order}, s={s_order}, d={k_dim}"
1652        );
1653    }
1654
1655    // Assemble the operator scalars from the radial derivatives. For r > 0
1656    // these divisions are removable-singularity quotients of moderate
1657    // quantities (no cancellation between blocks remains).
1658    let inv_r = 1.0 / r;
1659    let q = phi1 * inv_r;
1660    // q′ = φ″/r − φ′/r²; q″ = φ‴/r − 2φ″/r² + 2φ′/r³;
1661    // q‴ = φ⁗/r − 3φ‴/r² + 6φ″/r³ − 6φ′/r⁴.
1662    let q_r = phi2 * inv_r - phi1 * inv_r * inv_r;
1663    let q_rr = phi3 * inv_r - 2.0 * phi2 * inv_r * inv_r + 2.0 * phi1 * inv_r * inv_r * inv_r;
1664    let q_rrr = phi4 * inv_r - 3.0 * phi3 * inv_r * inv_r + 6.0 * phi2 * inv_r * inv_r * inv_r
1665        - 6.0 * phi1 * inv_r * inv_r * inv_r * inv_r;
1666    let t = q_r * inv_r;
1667    let t_r = q_rr * inv_r - q_r * inv_r * inv_r;
1668    let t_rr = q_rrr * inv_r - 2.0 * q_rr * inv_r * inv_r + 2.0 * q_r * inv_r * inv_r * inv_r;
1669
1670    Ok(DuchonRegularizedOperatorCore { q, t, t_r, t_rr })
1671}
1672
1673/// Whether the cancellation-free [`duchon_hybrid_kernel_stable_integral`] is
1674/// applicable for these orders: a genuine Matérn blend (`s ≥ 1`) whose
1675/// single-integral reduction has an integrable `w → 0` endpoint (`2p < d`).
1676///
1677/// The complementary cases — `s = 0` (pure polyharmonic, already evaluated
1678/// directly with no cancellation) and `2p ≥ d` (only reachable at low `d`,
1679/// where the partial-fraction sum has no meaningful cancellation) — retain the
1680/// existing partial-fraction path.
1681#[inline]
1682pub(crate) fn duchon_hybrid_stable_integral_applies(
1683    p_order: usize,
1684    s_order: usize,
1685    k_dim: usize,
1686) -> bool {
1687    s_order >= 1 && 2 * p_order < k_dim
1688}
1689
1690pub(crate) fn duchon_matern_kernel_general_from_distance(
1691    r: f64,
1692    length_scale: Option<f64>,
1693    p_order: usize,
1694    s_order: usize,
1695    k_dim: usize,
1696    coeffs: Option<&DuchonPartialFractionCoeffs>,
1697) -> Result<f64, BasisError> {
1698    if !r.is_finite() || r < 0.0 {
1699        crate::bail_invalid_basis!("Duchon kernel distance must be finite and non-negative");
1700    }
1701    let Some(length_scale) = length_scale else {
1702        return Ok(polyharmonic_kernel(
1703            r,
1704            pure_duchon_block_order(p_order, s_order as f64),
1705            k_dim,
1706        ));
1707    };
1708    if !length_scale.is_finite() || length_scale <= 0.0 {
1709        crate::bail_invalid_basis!("Duchon hybrid length_scale must be finite and positive");
1710    }
1711    let kappa = 1.0 / length_scale;
1712
1713    // gam#1424: for genuine high-dimensional Matérn blends the partial-fraction
1714    // sum below cancels catastrophically (the largest block dwarfs the true
1715    // ~1e-13 kernel value, destroying every significant digit and the PSD
1716    // property of the Gram matrix). Evaluate those orders with the
1717    // cancellation-free single-integral form instead — it also handles the
1718    // `r = 0` diagonal in closed form, so it short-circuits before the
1719    // near-collision Taylor branch.
1720    if duchon_hybrid_stable_integral_applies(p_order, s_order, k_dim) {
1721        return duchon_hybrid_kernel_stable_integral(r, kappa, p_order, s_order, k_dim);
1722    }
1723
1724    let coeffs_local;
1725    let coeffs_ref = if let Some(c) = coeffs {
1726        c
1727    } else {
1728        coeffs_local = duchon_partial_fraction_coeffs(p_order, s_order, kappa);
1729        &coeffs_local
1730    };
1731    let collision_taylor_radius = DUCHON_COLLISION_TAYLOR_REL * length_scale.max(1e-8);
1732    // The near-collision Taylor expansion uses phi(0) plus even-order
1733    // derivative collision limits. Those limits only exist when the kernel
1734    // is finite at the origin, i.e. when 2(p+s) > d. Below that threshold
1735    // the partial-fraction blocks individually diverge at r=0 but their
1736    // sum is still a well-defined function for any r > 0 (each Bessel-K
1737    // and r^{2m-d}-type block is finite away from origin). Fall through
1738    // to the direct sum in that regime; r=0 itself remains an error.
1739    let kernel_finite_at_origin = 2 * (p_order + s_order) > k_dim;
1740    if r <= collision_taylor_radius && kernel_finite_at_origin {
1741        return duchon_hybrid_kernel_near_collision_value(
1742            r,
1743            length_scale,
1744            p_order,
1745            s_order,
1746            k_dim,
1747            coeffs_ref,
1748        );
1749    }
1750    let mut val = KahanSum::default();
1751    for (m, coeff) in coeffs_ref.a.iter().enumerate().skip(1) {
1752        if *coeff == 0.0 {
1753            continue;
1754        }
1755        val.add(coeff * polyharmonic_kernel(r, (m) as f64, k_dim));
1756    }
1757    for (n, coeff) in coeffs_ref.b.iter().enumerate().skip(1) {
1758        if *coeff == 0.0 {
1759            continue;
1760        }
1761        val.add(coeff * duchon_matern_block(r, kappa, n, k_dim)?);
1762    }
1763    Ok(val.sum())
1764}
1765
1766pub(crate) fn duchon_hybrid_kernel_collision_value(
1767    length_scale: f64,
1768    p_order: usize,
1769    s_order: usize,
1770    k_dim: usize,
1771    coeffs: &DuchonPartialFractionCoeffs,
1772) -> Result<f64, BasisError> {
1773    let spectral_order = 2 * (p_order + s_order);
1774    if spectral_order <= k_dim {
1775        crate::bail_invalid_basis!(
1776            "Duchon hybrid diagonal is not finite when 2*(p+s) <= dimension; got 2*(p+s)={spectral_order}, dimension={k_dim}, p={p_order}, s={s_order}"
1777        );
1778    }
1779
1780    let kappa = 1.0 / length_scale.max(1e-300);
1781    let mut pure = KahanSum::default();
1782    let mut log_part = KahanSum::default();
1783    for (m, &a_m) in coeffs.a.iter().enumerate().skip(1) {
1784        if a_m == 0.0 {
1785            continue;
1786        }
1787        let (block_pure, block_log) = duchon_polyharmonic_block_taylor_r2j(m, k_dim, 0);
1788        pure.add(a_m * block_pure);
1789        log_part.add(a_m * block_log);
1790    }
1791    for (n, &b_n) in coeffs.b.iter().enumerate().skip(1) {
1792        if b_n == 0.0 {
1793            continue;
1794        }
1795        let (block_pure, block_log) = duchon_matern_block_taylor_r2j(kappa, n, k_dim, 0);
1796        pure.add(b_n * block_pure);
1797        log_part.add(b_n * block_log);
1798    }
1799    let value = pure.sum();
1800    let log_value = log_part.sum();
1801    if log_value.abs() > 1e-8 * value.abs().max(1e-30) {
1802        crate::bail_invalid_basis!(
1803            "Duchon hybrid diagonal log terms did not cancel: log={log_value:.6e}, value={value:.6e}; p={p_order}, s={s_order}, d={k_dim}"
1804        );
1805    }
1806    if !value.is_finite() {
1807        crate::bail_invalid_basis!(
1808            "non-finite Duchon hybrid diagonal value for p={p_order}, s={s_order}, d={k_dim}"
1809        );
1810    }
1811    Ok(value)
1812}
1813
1814pub(crate) fn duchon_hybrid_kernel_near_collision_value(
1815    r: f64,
1816    length_scale: f64,
1817    p_order: usize,
1818    s_order: usize,
1819    k_dim: usize,
1820    coeffs: &DuchonPartialFractionCoeffs,
1821) -> Result<f64, BasisError> {
1822    let mut value =
1823        duchon_hybrid_kernel_collision_value(length_scale, p_order, s_order, k_dim, coeffs)?;
1824    if r == 0.0 {
1825        return Ok(value);
1826    }
1827
1828    // Radial Taylor expansion about the center collision:
1829    //
1830    //   phi(r) = phi(0)
1831    //          + phi''(0) r^2 / 2
1832    //          + phi''''(0) r^4 / 24
1833    //          + phi''''''(0) r^6 / 720 + ...
1834    //
1835    // Odd terms vanish for an isotropic radial kernel. A finite 2q-th
1836    // derivative at zero requires spectral smoothness 2(p+s) > d + 2q.
1837    // Terms whose collision derivative does not exist are omitted; this is
1838    // still strictly better than evaluating the raw partial-fraction sum at a
1839    // tiny nonzero radius, where large singular components cancel only after
1840    // losing many digits.
1841    let smoothness_order = 2 * (p_order + s_order);
1842    let r2 = r * r;
1843    if smoothness_order > k_dim + 2 {
1844        let (phi_rr, _, _) =
1845            duchonphi_rr_collision_psi_triplet(length_scale, p_order, s_order, k_dim, coeffs)?;
1846        value += 0.5 * phi_rr * r2;
1847    }
1848    if smoothness_order > k_dim + 4 {
1849        let phi_rrrr = duchon_phi_rrrr_collision(length_scale, p_order, s_order, k_dim, coeffs)?;
1850        value += (1.0 / 24.0) * phi_rrrr * r2 * r2;
1851    }
1852    if smoothness_order > k_dim + 6 {
1853        let phi_rrrrrr =
1854            duchon_phi_rrrrrr_collision(length_scale, p_order, s_order, k_dim, coeffs)?;
1855        value += (1.0 / 720.0) * phi_rrrrrr * r2 * r2 * r2;
1856    }
1857    if !value.is_finite() {
1858        crate::bail_invalid_basis!(
1859            "non-finite Duchon hybrid near-collision value at r={r}, p={p_order}, s={s_order}, d={k_dim}"
1860        );
1861    }
1862    Ok(value)
1863}
1864
1865#[inline(always)]
1866pub(crate) fn stable_euclidean_norm<I>(components: I) -> f64
1867where
1868    I: IntoIterator<Item = f64>,
1869{
1870    let mut scale = 0.0_f64;
1871    let mut sumsq = 1.0_f64;
1872    let mut has_nonzero = false;
1873    for component in components {
1874        let abs = component.abs();
1875        if abs == 0.0 {
1876            continue;
1877        }
1878        if !abs.is_finite() {
1879            return f64::INFINITY;
1880        }
1881        if !has_nonzero {
1882            scale = abs;
1883            has_nonzero = true;
1884            continue;
1885        }
1886        if scale < abs {
1887            let ratio = scale / abs;
1888            sumsq = 1.0 + sumsq * ratio * ratio;
1889            scale = abs;
1890        } else {
1891            let ratio = abs / scale;
1892            sumsq += ratio * ratio;
1893        }
1894    }
1895    if has_nonzero {
1896        scale * sumsq.sqrt()
1897    } else {
1898        0.0
1899    }
1900}
1901
1902#[inline]
1903pub(crate) fn centered_aniso_log_scale_mean(eta: &[f64]) -> f64 {
1904    if eta.len() <= 1 {
1905        0.0
1906    } else {
1907        eta.iter().sum::<f64>() / eta.len() as f64
1908    }
1909}
1910
1911#[inline]
1912pub(crate) fn centered_aniso_log_scale(value: f64, mean: f64) -> f64 {
1913    // This bound exists solely to keep the downstream `.exp()` (axis scale and
1914    // metric weight) finite. `f64::clamp` leaves NaN as NaN, so a non-finite
1915    // contrast (e.g. an `inf − inf` from a degenerate anisotropy `eta`) would
1916    // slip through and poison the Gram matrix. Map any non-finite difference to
1917    // the saturating bound explicitly; finite inputs take the identical clamp.
1918    let centered = value - mean;
1919    if centered.is_finite() {
1920        centered.clamp(-50.0, 50.0)
1921    } else if centered > 0.0 {
1922        50.0
1923    } else {
1924        -50.0
1925    }
1926}
1927
1928#[inline]
1929pub(crate) fn aniso_axis_scale(value: f64, mean: f64) -> f64 {
1930    centered_aniso_log_scale(value, mean).exp()
1931}
1932
1933#[inline]
1934pub(crate) fn aniso_metric_weight(value: f64, mean: f64) -> f64 {
1935    (2.0 * centered_aniso_log_scale(value, mean)).exp()
1936}
1937
1938pub(crate) fn centered_aniso_metric_weights(eta: &[f64]) -> Vec<f64> {
1939    let mean = centered_aniso_log_scale_mean(eta);
1940    eta.iter()
1941        .map(|&value| aniso_metric_weight(value, mean))
1942        .collect()
1943}
1944
1945/// Compute anisotropic squared distance components and total distance.
1946///
1947/// This is the core of **geometric anisotropy**: a linear warp Λ = diag(κ_a)
1948/// turns ellipsoidal correlation contours into isotropic ones. Writing h = x − c,
1949/// z = Λh, the anisotropic distance is r = |z| = |Λh|.
1950///
1951/// We decompose Λ = κ · A where det(A) = 1, parameterized as
1952///   ψ_a = ψ̄ + η_a,   Σ η_a = 0
1953/// where ψ̄ is the global scale (existing scalar κ) and η_a are d−1 anisotropy
1954/// contrasts. This separates scale from shape and preserves the Duchon scaling
1955/// law φ(r;κ) = κ^δ H(κr) for the global part.
1956///
1957/// Given per-axis log-scales `eta`, the identifiable centered contrasts are
1958/// ψ_a = eta_a - mean(eta). The metric uses those contrasts so Σ_a ψ_a = 0
1959/// even when a caller passes an uncentered vector:
1960///
1961///   r = √( Σ_a exp(2·ψ_a) · (x_a - c_a)² )
1962///
1963/// Returns `(r, s_vec)` where `s_vec[a] = exp(2·ψ_a) · h_a²` is the
1964/// per-axis weighted squared displacement. These components are needed for
1965/// per-axis derivatives: `∂φ/∂ψ_a = q · s_a`.
1966///
1967/// The derivative chain through r gives:
1968///   ∇_ψ r      = s / r
1969///   ∇²_ψ r     = (2/r) Diag(s) − (1/r³) ss'
1970/// which is diagonal + rank-1, so Hessian-vector products are O(d).
1971#[inline]
1972pub(crate) fn aniso_distance_and_components(
1973    data_row: &[f64],
1974    center: &[f64],
1975    eta: &[f64],
1976) -> (f64, Vec<f64>) {
1977    assert_eq!(data_row.len(), center.len());
1978    assert_eq!(data_row.len(), eta.len());
1979    let d = data_row.len();
1980    let eta_mean = centered_aniso_log_scale_mean(eta);
1981    let mut s_vec = Vec::with_capacity(d);
1982    let mut scaled_components = Vec::with_capacity(d);
1983    for a in 0..d {
1984        let h_a = data_row[a] - center[a];
1985        // Clamp exp(2ψ) to avoid overflow/underflow: ψ in [-50, 50].
1986        let scale_a = aniso_axis_scale(eta[a], eta_mean);
1987        let scaled_h_a = scale_a * h_a;
1988        let s_a = scaled_h_a * scaled_h_a;
1989        scaled_components.push(scaled_h_a);
1990        s_vec.push(s_a);
1991    }
1992    (stable_euclidean_norm(scaled_components), s_vec)
1993}
1994
1995/// Compute anisotropic distance without returning per-axis components.
1996///
1997/// This is the lightweight version of [`aniso_distance_and_components`] for
1998/// call sites that only need the scalar distance `r`.
1999#[inline]
2000pub(crate) fn aniso_distance(data_row: &[f64], center: &[f64], eta: &[f64]) -> f64 {
2001    assert_eq!(data_row.len(), center.len());
2002    assert_eq!(data_row.len(), eta.len());
2003    let eta_mean = centered_aniso_log_scale_mean(eta);
2004    stable_euclidean_norm(
2005        (0..data_row.len()).map(|a| aniso_axis_scale(eta[a], eta_mean) * (data_row[a] - center[a])),
2006    )
2007}
2008
2009#[inline(always)]
2010pub(crate) fn euclidean_distance_rows(
2011    lhs: ArrayView2<'_, f64>,
2012    lhs_row: usize,
2013    rhs: ArrayView2<'_, f64>,
2014    rhs_row: usize,
2015) -> f64 {
2016    assert_eq!(lhs.ncols(), rhs.ncols());
2017    stable_euclidean_norm((0..lhs.ncols()).map(|axis| lhs[[lhs_row, axis]] - rhs[[rhs_row, axis]]))
2018}
2019
2020#[inline(always)]
2021pub(crate) fn aniso_axis_scales(eta: &[f64]) -> Vec<f64> {
2022    let eta_mean = centered_aniso_log_scale_mean(eta);
2023    eta.iter()
2024        .map(|&value| aniso_axis_scale(value, eta_mean))
2025        .collect()
2026}
2027
2028#[inline(always)]
2029pub(crate) fn aniso_distance_rows_with_scales(
2030    lhs: ArrayView2<'_, f64>,
2031    lhs_row: usize,
2032    rhs: ArrayView2<'_, f64>,
2033    rhs_row: usize,
2034    axis_scales: &[f64],
2035) -> f64 {
2036    assert_eq!(lhs.ncols(), rhs.ncols());
2037    assert_eq!(lhs.ncols(), axis_scales.len());
2038    stable_euclidean_norm(
2039        (0..lhs.ncols())
2040            .map(|axis| axis_scales[axis] * (lhs[[lhs_row, axis]] - rhs[[rhs_row, axis]])),
2041    )
2042}
2043
2044pub(crate) fn fill_symmetric_from_row_kernel<F>(
2045    matrix: &mut Array2<f64>,
2046    kernel: F,
2047) -> Result<(), BasisError>
2048where
2049    F: Fn(usize, usize) -> Result<f64, BasisError> + Sync,
2050{
2051    assert_eq!(matrix.nrows(), matrix.ncols());
2052    let k = matrix.nrows();
2053    // The kernels passed here are pure functions of the (symmetric) pairwise
2054    // center distance, so `kernel(i, j) == kernel(j, i)`. Evaluate only the
2055    // upper triangle (including the diagonal) in parallel — each row task
2056    // touches only its own `j >= i` cells, so the borrows stay disjoint — then
2057    // mirror into the lower triangle. This halves the (sqrt + special-function)
2058    // kernel evaluations relative to filling every cell independently, with no
2059    // change to the resulting matrix (still exactly symmetric).
2060    matrix
2061        .axis_iter_mut(Axis(0))
2062        .into_par_iter()
2063        .enumerate()
2064        .try_for_each(|(i, mut row)| {
2065            for j in i..k {
2066                row[j] = kernel(i, j)?;
2067            }
2068            Ok::<(), BasisError>(())
2069        })?;
2070    for i in 1..k {
2071        for j in 0..i {
2072            matrix[[i, j]] = matrix[[j, i]];
2073        }
2074    }
2075    Ok(())
2076}
2077
2078/// Return y-space points `y_{i,a} = exp(ψ_a) x_{i,a}` with
2079/// `ψ_a = η_a - mean(η)` so Euclidean pairwise
2080/// distances in y equal anisotropic kernel distances in x:
2081///   |y_i - y_j|² = Σ_a exp(2 ψ_a) (x_{i,a} - x_{j,a})² = aniso_distance²(x_i, x_j, η).
2082/// Use this before `pairwise_distance_bounds` whenever κ conditioning
2083/// bounds must match the kernel's actual metric (anisotropic case). For
2084/// isotropic terms, pass `None` and keep using the raw centers.
2085pub(crate) fn points_in_aniso_y_space(points: ArrayView2<'_, f64>, eta: &[f64]) -> Array2<f64> {
2086    assert_eq!(points.ncols(), eta.len());
2087    let mut y = points.to_owned();
2088    let eta_mean = centered_aniso_log_scale_mean(eta);
2089    let weights: Vec<f64> = eta.iter().map(|&e| aniso_axis_scale(e, eta_mean)).collect();
2090    for a in 0..eta.len() {
2091        let w_a = weights[a];
2092        y.column_mut(a).mapv_inplace(|v| v * w_a);
2093    }
2094    y
2095}
2096
2097/// Compute per-axis standard deviations of knot center coordinates.
2098///
2099/// Returns σ_a for each axis column of `centers`. Axes with zero variance
2100/// (constant column) get σ_a = 1.0. All values are clamped to [1e-6, 1e6].
2101pub fn knot_cloud_axis_scales(centers: ArrayView2<'_, f64>) -> Vec<f64> {
2102    let k = centers.nrows();
2103    let d = centers.ncols();
2104    if k < 2 || d == 0 {
2105        return vec![1.0; d];
2106    }
2107    let n = k as f64;
2108    let mut scales = Vec::with_capacity(d);
2109    for a in 0..d {
2110        let col = centers.column(a);
2111        let mean = col.sum() / n;
2112        let var = col.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / (n - 1.0);
2113        let sigma = var.sqrt();
2114        // If variance is zero (constant column), use 1.0 (no scaling).
2115        let sigma = if sigma < 1e-12 { 1.0 } else { sigma };
2116        scales.push(sigma.clamp(1e-6, 1e6));
2117    }
2118    scales
2119}
2120
2121/// Compute initial anisotropy contrasts η_a from knot center geometry.
2122///
2123/// Returns η_a = −ln(σ_a) + (1/d) Σ_b ln(σ_b), which satisfies Ση_a = 0
2124/// by construction. Axes with more spread get negative η_a (smaller κ_a,
2125/// longer correlation range), axes with less spread get positive η_a.
2126///
2127/// If d ≤ 1, returns an empty vector (anisotropy is meaningless for 1-D).
2128pub fn initial_aniso_contrasts(centers: ArrayView2<'_, f64>) -> Vec<f64> {
2129    let d = centers.ncols();
2130    if d <= 1 {
2131        return Vec::new();
2132    }
2133    let scales = knot_cloud_axis_scales(centers);
2134    let mean_neg_log: f64 = scales.iter().map(|&s| -s.ln()).sum::<f64>() / d as f64;
2135    // η_a = −ln(σ_a) + (1/d) Σ_b ln(σ_b)
2136    //     = −ln(σ_a) − mean(−ln(σ_b))
2137    //     = neg_log_scales[a] − mean(neg_log_scales)
2138    scales
2139        .iter()
2140        .map(|&scale| -scale.ln() - mean_neg_log)
2141        .collect()
2142}
2143
2144/// Pure forward transform of the supplied anisotropy log-scales: subtract the
2145/// mean (so Σ η = 0) and zero tiny residuals. `None` (or a 1-D problem, where
2146/// centering is a no-op) means *no* anisotropy.
2147///
2148/// This is a **continuous function of η with no hidden data dependence**: an
2149/// explicit all-zero vector centers to all-zero, i.e. the isotropic metric
2150/// (weights `exp(2·0) = 1`, Euclidean radius). It is therefore identical, as a
2151/// design, to the `None` path through `η = 0`, and is continuous across it —
2152/// `[1e-9, -1e-9]` and `[0, 0]` map to neighboring designs, not a jump.
2153///
2154/// The Matérn input-location jet/Hessian (`matern_metric_weights`, the public
2155/// `matern_input_location_first_jet`/`_hessian` FFI) and the `UserProvided`-center
2156/// forward design both apply *this* transform, so the jet differentiates exactly
2157/// the function the public design evaluates (#437), and an explicit isotropic
2158/// request reduces to the closed-form isotropic Matérn kernel rather than a
2159/// data-driven anisotropic one (#1042).
2160///
2161/// Auto-initialization of `η` from knot-cloud geometry is a *separate* concern
2162/// handled by [`auto_seed_aniso_contrasts`]; it is reserved for callers that
2163/// opt into data-derived geometry (the κ-optimizer's data-driven center
2164/// strategies and the pure-Duchon `scale_dims` path), selected by
2165/// [`resolve_matern_forward_aniso`].
2166pub(crate) fn centered_aniso_contrasts(aniso: Option<&[f64]>) -> Option<Vec<f64>> {
2167    match aniso {
2168        Some(v) if v.len() > 1 => Some(center_aniso_log_scales(v)),
2169        Some(v) => Some(v.to_vec()),
2170        None => None,
2171    }
2172}
2173
2174/// Auto-seed anisotropy contrasts from knot-cloud geometry for callers that use
2175/// an all-zero vector as the "initialize me" sentinel.
2176///
2177/// Used by (a) the pure-Duchon `scale_dims` path, where `η` is a FIXED,
2178/// geometry-derived basis parameter that is never enrolled as a REML hyper-axis
2179/// (see `spatial_term_supports_hyper_optimization`): "standardize the geometry,
2180/// then learn the smoothness"; and (b) the Matérn forward design when the term
2181/// uses a **data-driven** center strategy, i.e. the κ-optimizer's seeding
2182/// sentinel (the optimizer's analytic ψ-gradient is computed against the same
2183/// auto-seeded design, so the pair stays consistent). A non-zero (or absent)
2184/// vector is honored verbatim (centered, exactly like [`centered_aniso_contrasts`]);
2185/// only an *exactly* all-zero vector is replaced by `initial_aniso_contrasts(centers)`.
2186///
2187/// A `UserProvided`-center Matérn term does NOT use this — its geometry is fully
2188/// caller-specified, so an explicit all-zero η must be honored literally; folding
2189/// the geometry seed into that path made the public design discontinuous at
2190/// `η = 0` and hijacked explicit isotropic requests (#1042).
2191pub(crate) fn auto_seed_aniso_contrasts(
2192    centers: ArrayView2<'_, f64>,
2193    aniso: Option<&[f64]>,
2194) -> Option<Vec<f64>> {
2195    let eta = match aniso {
2196        Some(v) if v.len() > 1 => v,
2197        Some(v) => return Some(v.to_vec()),
2198        None => return None,
2199    };
2200    let all_zero = eta.iter().all(|&e| e == 0.0);
2201    if !all_zero {
2202        return Some(center_aniso_log_scales(eta));
2203    }
2204    let contrasts = initial_aniso_contrasts(centers);
2205    if contrasts.is_empty() {
2206        Some(center_aniso_log_scales(eta))
2207    } else {
2208        Some(center_aniso_log_scales(&contrasts))
2209    }
2210}
2211
2212fn center_aniso_log_scales(eta: &[f64]) -> Vec<f64> {
2213    if eta.len() <= 1 {
2214        return eta.to_vec();
2215    }
2216    let mean = eta.iter().sum::<f64>() / eta.len() as f64;
2217    eta.iter()
2218        .map(|&v| {
2219            let centered = v - mean;
2220            if centered.abs() <= 1e-15 {
2221                0.0
2222            } else {
2223                centered
2224            }
2225        })
2226        .collect()
2227}
2228
2229/// How the Matérn forward design build interprets an *exactly all-zero*
2230/// `aniso_log_scales` vector.
2231#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2232pub enum AnisoSeedMode {
2233    /// All-zero `η` is the κ-optimizer / `scale_dims` seeding sentinel: replace
2234    /// it with geometry-derived contrasts from the knot cloud
2235    /// ([`auto_seed_aniso_contrasts`]). This is the default for every internal
2236    /// build entry; the optimizer's analytic ψ-gradient is computed against the
2237    /// same auto-seeded design, so value/gradient stay consistent. Note that by
2238    /// the time the κ-optimizer rebuilds a frozen design the center strategy has
2239    /// usually been resolved to `UserProvided`, so center provenance cannot be
2240    /// used to distinguish this from a genuine literal request — the mode must
2241    /// be carried explicitly.
2242    AutoSeedFromGeometry,
2243    /// All-zero `η` is an explicit isotropic request and is honored literally
2244    /// ([`centered_aniso_contrasts`]): the design reduces to the closed-form
2245    /// isotropic Matérn and varies continuously through `η = 0`. The public
2246    /// `matern_basis` FFI (and its input-location jet/Hessian) selects this so a
2247    /// caller's explicit isotropic request is not hijacked into a data-driven
2248    /// anisotropic kernel (#1042).
2249    Literal,
2250}
2251
2252/// Resolve the anisotropy contrasts the Matérn forward design build applies,
2253/// dispatching on the explicit [`AnisoSeedMode`].
2254pub(crate) fn resolve_matern_forward_aniso(
2255    mode: AnisoSeedMode,
2256    centers: ArrayView2<'_, f64>,
2257    aniso: Option<&[f64]>,
2258) -> Option<Vec<f64>> {
2259    match mode {
2260        AnisoSeedMode::Literal => centered_aniso_contrasts(aniso),
2261        AnisoSeedMode::AutoSeedFromGeometry => auto_seed_aniso_contrasts(centers, aniso),
2262    }
2263}
2264
2265pub(crate) fn pairwise_distance_bounds(points: ArrayView2<'_, f64>) -> Option<(f64, f64)> {
2266    let n = points.nrows();
2267    let d = points.ncols();
2268    if n < 2 || d == 0 {
2269        return None;
2270    }
2271    let mut r_min = f64::INFINITY;
2272    let mut r_max = 0.0_f64;
2273    for i in 0..n {
2274        for j in (i + 1)..n {
2275            let r = stable_euclidean_norm((0..d).map(|c| points[[i, c]] - points[[j, c]]));
2276            if r.is_finite() && r > 0.0 {
2277                r_min = r_min.min(r);
2278                r_max = r_max.max(r);
2279            }
2280        }
2281    }
2282    if r_min.is_finite() && r_max.is_finite() && r_min > 0.0 && r_max > 0.0 {
2283        Some((r_min, r_max))
2284    } else {
2285        None
2286    }
2287}
2288
2289/// Capped-sample pairwise distance bounds for large point clouds.
2290///
2291/// Returns `(r_min_hat, r_max_hat)` such that:
2292/// - `r_max_hat <= true r_max`  (pairwise max over a sub-sample is monotone
2293///    in the sample, so the sampled max underestimates the true max).
2294/// - `r_min_hat >= true r_min`  (pairwise min over a sub-sample can only
2295///    exclude some pairs, so the sampled min overestimates the true min).
2296///
2297/// Both approximations are conservative for κ-bound derivation:
2298///   kappa_lo = 1e-2 / r_max_hat  >=  1e-2 / true r_max  (wider window, low κ)
2299///   kappa_hi = 1e2  / r_min_hat  <=  1e2  / true r_min  (tighter window, high κ)
2300/// so no feasible κ that the exact bound would include is excluded by the
2301/// approximation — it can only slightly shrink the high-κ tail, which is
2302/// exactly the regime (κ → ∞ ⇒ degenerate kernel) that we want the outer
2303/// optimizer to avoid anyway.
2304///
2305/// Sampling picks `K = 1024` indices spaced evenly across the FULL index range
2306/// `[0, n-1]` (endpoints included): `idx(s) = round(s·(n-1)/(K-1))`. For a cap
2307/// of `K = 1024` and n up to ~10⁹ this yields O(K²·d) work per call — a few
2308/// hundred μs. For n ≤ K the exact pairwise is used.
2309///
2310/// #1033: spanning the full range (rather than the contiguous prefix `0,
2311/// stride, …, (K-1)·stride` that an integer `stride = n/K` produces — which
2312/// floors to `1` for every n in `(K, 2K]` and so visits only indices `0..K`,
2313/// dropping the entire tail of the cloud) is what makes the diameter estimate
2314/// `r_max_hat` n-STABLE. The κ/ψ window is derived once from `r_max_hat`
2315/// (`psi_lo = ln(diameter_fraction / r_max_hat)`); a prefix-sampled `r_max_hat`
2316/// shrinks with the prefix's spatial extent as n crosses `K`, which silently
2317/// makes the outer optimizer's box — and therefore its whole trajectory —
2318/// n-dependent. Even spacing keeps `r_max_hat` ≈ the true cloud diameter at
2319/// every n, so the sufficient-statistic outer loop touches the same ψ window
2320/// regardless of n.
2321pub(crate) fn pairwise_distance_bounds_sampled(points: ArrayView2<'_, f64>) -> Option<(f64, f64)> {
2322    const K_CAP: usize = 1024;
2323    let n = points.nrows();
2324    let d = points.ncols();
2325    if n < 2 || d == 0 {
2326        return None;
2327    }
2328    if n <= K_CAP {
2329        return pairwise_distance_bounds(points);
2330    }
2331    // Evenly spaced indices spanning `[0, n-1]` inclusive (n > K_CAP ⇒ k = K_CAP
2332    // ≥ 2, so the denominator is positive and `idx(0)=0`, `idx(k-1)=n-1`). The
2333    // spacing `(n-1)/(k-1) > 1`, so distinct `s` map to distinct rows; any rare
2334    // rounding collision is harmless (the `r > 0.0` guard drops a zero pair).
2335    let k = K_CAP;
2336    let denom = (k - 1) as f64;
2337    let span = (n - 1) as f64;
2338    let sample_index = |s: usize| -> usize { ((s as f64) * span / denom).round() as usize };
2339    let mut r_min = f64::INFINITY;
2340    let mut r_max = 0.0_f64;
2341    for i_idx in 0..k {
2342        let i = sample_index(i_idx);
2343        for j_idx in (i_idx + 1)..k {
2344            let j = sample_index(j_idx);
2345            let r = stable_euclidean_norm((0..d).map(|c| points[[i, c]] - points[[j, c]]));
2346            if r.is_finite() && r > 0.0 {
2347                r_min = r_min.min(r);
2348                r_max = r_max.max(r);
2349            }
2350        }
2351    }
2352    if r_min.is_finite() && r_max.is_finite() && r_min > 0.0 && r_max > 0.0 {
2353        Some((r_min, r_max))
2354    } else {
2355        None
2356    }
2357}
2358
2359#[cfg(test)]
2360mod duchon_hybrid_psd_tests {
2361    use super::*;
2362    use faer::Side;
2363    use gam_linalg::faer_ndarray::FaerEigh;
2364
2365    /// #1033: the capped-sample diameter estimate must be n-STABLE on a fixed
2366    /// point cloud. A uniform grid on `[-3, 3]` has a fixed true diameter (6.0)
2367    /// and minimum spacing that shrinks like `6/(n-1)` regardless of how finely
2368    /// it is sampled. The κ/ψ window is derived ONCE from `r_max_hat`, so if the
2369    /// sampler underestimates the diameter as n crosses the `K_CAP = 1024`
2370    /// threshold (the old `stride = n/K` prefix bug visited only indices
2371    /// `0..K`, i.e. the LEFT HALF of the domain for n in `(1024, 2048]`,
2372    /// halving `r_max_hat`), the outer optimizer's box — and hence its whole
2373    /// trajectory — becomes n-dependent, which is exactly the invariant #1033
2374    /// forbids. This pins `r_max_hat ≈ 6.0` across the threshold.
2375    #[test]
2376    fn sampled_diameter_is_n_stable_across_cap_threshold() {
2377        let grid = |n: usize| -> Array2<f64> {
2378            let mut x = Array2::<f64>::zeros((n, 1));
2379            for i in 0..n {
2380                x[[i, 0]] = (i as f64) / (n as f64 - 1.0) * 6.0 - 3.0;
2381            }
2382            x
2383        };
2384        // Below the cap (exact), straddling it, and well above it.
2385        let exact_diam = 6.0_f64;
2386        let mut last_rmax: Option<f64> = None;
2387        for &n in &[1000usize, 1025, 1500, 2000, 4000, 50_000] {
2388            let x = grid(n);
2389            let (r_min, r_max) =
2390                pairwise_distance_bounds_sampled(x.view()).expect("bounds for dense grid");
2391            // The sampled diameter must stay within 1% of the true 6.0 at every
2392            // n — NOT collapse to ~3.0 as the prefix bug did for n in (1024,2048].
2393            assert!(
2394                (r_max - exact_diam).abs() <= 0.01 * exact_diam,
2395                "sampled r_max at n={n} = {r_max:.6} drifted from the true diameter \
2396                 {exact_diam:.6}: the diameter estimate is n-dependent (#1033)"
2397            );
2398            // Cross-n stability: consecutive n's must agree on r_max to <2%.
2399            if let Some(prev) = last_rmax {
2400                let rel = (r_max - prev).abs() / exact_diam;
2401                assert!(
2402                    rel <= 0.02,
2403                    "sampled r_max jumped {rel:.4} (rel) between n steps near n={n}: \
2404                     {prev:.6} -> {r_max:.6}; outer-loop box is not n-stable (#1033)"
2405                );
2406            }
2407            last_rmax = Some(r_max);
2408            // r_min is positive and finite (the floor used for the high-κ ceiling).
2409            assert!(
2410                r_min.is_finite() && r_min > 0.0,
2411                "r_min must be positive at n={n}"
2412            );
2413        }
2414    }
2415
2416    /// The sampler's chosen indices must span the FULL range `[0, n-1]`
2417    /// (endpoints included) — the property that makes the diameter estimate
2418    /// stable. Reconstruct the index set the implementation uses and assert it
2419    /// reaches both ends with no contiguous-prefix clustering.
2420    #[test]
2421    fn sampled_indices_span_full_range() {
2422        const K_CAP: usize = 1024;
2423        let n = 2000usize; // stride = n/K_CAP would floor to 1 → prefix bug regime
2424        let k = K_CAP;
2425        let denom = (k - 1) as f64;
2426        let span = (n - 1) as f64;
2427        let idx = |s: usize| -> usize { ((s as f64) * span / denom).round() as usize };
2428        assert_eq!(idx(0), 0, "first sample must be index 0");
2429        assert_eq!(idx(k - 1), n - 1, "last sample must be the final index n-1");
2430        // The largest gap between consecutive samples must be ≈ (n-1)/(k-1),
2431        // i.e. roughly 2 here — NOT a single dense prefix followed by a void.
2432        let mut max_gap = 0usize;
2433        for s in 1..k {
2434            max_gap = max_gap.max(idx(s) - idx(s - 1));
2435        }
2436        assert!(
2437            max_gap <= 2,
2438            "evenly-spaced samples should step by ~{:.2}; saw a gap of {max_gap} \
2439             (prefix clustering would leave one huge gap)",
2440            span / denom
2441        );
2442    }
2443
2444    /// Deterministic, well-separated centers on `[-1, 1]^d` (a Halton-style
2445    /// low-discrepancy lattice over the radical-inverse base sequence). Mirrors
2446    /// the `4*d` random centers the Python fixture
2447    /// (`tests/test_python_api.py`'s high-dimensional hybrid Duchon penalty PSD
2448    /// check) draws, but without an RNG so the regression is byte-stable.
2449    fn fixture_centers(d: usize, n: usize) -> Array2<f64> {
2450        const BASES: [u64; 24] = [
2451            2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
2452            89,
2453        ];
2454        let mut centers = Array2::<f64>::zeros((n, d));
2455        for i in 0..n {
2456            for axis in 0..d {
2457                let base = BASES[axis % BASES.len()];
2458                // Van der Corput radical inverse of (i + 1) in `base`, mapped to
2459                // [-1, 1]. Different axes use different primes, so the cloud is
2460                // affinely full-rank and spans the linear null space.
2461                let mut f = 1.0_f64;
2462                let mut idx = (i + 1) as u64;
2463                let mut value = 0.0_f64;
2464                while idx > 0 {
2465                    f /= base as f64;
2466                    value += f * (idx % base) as f64;
2467                    idx /= base;
2468                }
2469                centers[[i, axis]] = 2.0 * value - 1.0;
2470            }
2471        }
2472        centers
2473    }
2474
2475    /// Smallest symmetric eigenvalue of `matrix` (the matrix is symmetrized
2476    /// first; the constrained Duchon penalty is symmetric by construction).
2477    fn lambda_min(matrix: &Array2<f64>) -> f64 {
2478        let sym = symmetrize_penalty(matrix);
2479        let (evals, _) = FaerEigh::eigh(&sym, Side::Lower).expect("symmetric eigendecomposition");
2480        evals.iter().copied().fold(f64::INFINITY, f64::min)
2481    }
2482
2483    /// gam#1424: the (d=16, m=2, s=7) hybrid Duchon–Matérn fixture used to lose
2484    /// positive definiteness through catastrophic cancellation in the
2485    /// partial-fraction kernel expansion — the constrained, post-normalization
2486    /// penalty had λ_min ≈ −0.26442 even though the kernel's spectral density
2487    /// `ρ^{-2p}(κ²+ρ²)^{-s}` is nonnegative (so the true penalty is PSD). The
2488    /// kernel now routes through the cancellation-free single-integral form, so
2489    /// the spectrum is numerically PSD. This mirrors the production penalty path
2490    /// `duchon_constrained_bending_penalty` → `normalize_penalty`.
2491    #[test]
2492    fn high_dim_hybrid_penalty_is_numerically_psd_1424() {
2493        let d = 16usize;
2494        // m=2 ⇒ Linear null space. The cubic default spectral power is the
2495        // fractional (d-1)/2 = 7.5; the production hybrid config resolves it to
2496        // the integer spectral order the closed-form kernel consumes, s = 7
2497        // (`duchon_constrained_bending_penalty` itself takes the integer view via
2498        // `duchon_power_to_usize`, and the reroute predicate needs s ≥ 1). This is
2499        // the (d=16, m=2, s=7) fixture from the issue and the Python
2500        // `duchon_function_norm_penalty` PSD test.
2501        let (nullspace_order, default_power) = duchon_cubic_default(d);
2502        assert!(matches!(nullspace_order, DuchonNullspaceOrder::Linear));
2503        assert!(
2504            (default_power - 7.5).abs() < 1e-12,
2505            "cubic-default power for d=16 is 7.5"
2506        );
2507        let power = 7.0_f64;
2508        assert_eq!(duchon_power_to_usize(power), 7);
2509        // The reroute must engage for this fixture (s = 7 ≥ 1, 2p = 4 < d = 16).
2510        assert!(duchon_hybrid_stable_integral_applies(
2511            duchon_p_from_nullspace_order(nullspace_order),
2512            duchon_power_to_usize(power),
2513            d,
2514        ));
2515        let length_scale = Some(1.0_f64);
2516        let centers = fixture_centers(d, 4 * d);
2517
2518        let mut cache = BasisCacheContext::default();
2519        let z = kernel_constraint_nullspace(centers.view(), nullspace_order, &mut cache)
2520            .expect("constraint null space");
2521
2522        let omega = duchon_constrained_bending_penalty(
2523            centers.view(),
2524            length_scale,
2525            power,
2526            nullspace_order,
2527            None,
2528            &z,
2529        )
2530        .expect("constrained bending penalty assembles for the hybrid fixture");
2531        let (penalty, _scale) = normalize_penalty(&omega);
2532
2533        let lam_min = lambda_min(&penalty);
2534        assert!(
2535            lam_min >= -1e-10,
2536            "gam#1424: (d=16, m=2, s=7) hybrid penalty is not numerically PSD: \
2537             λ_min={lam_min:.6e} (was ≈ −0.26442 with the cancellation-prone \
2538             partial-fraction kernel)"
2539        );
2540    }
2541
2542    /// gam#1604: independent closed form for the hybrid-kernel origin value
2543    /// `φ(0) = F⁻¹[ρ^{-2p}(κ²+ρ²)^{-s}](0)` in `d` dimensions, derived by
2544    /// Schwinger-parametrizing both rational factors and evaluating the radial
2545    /// inverse-FT integral at `r = 0`:
2546    ///
2547    ///   φ(0) = (4π)^{-d/2} / Γ(s) · Γ(b) · κ^{-2b} · Γ(d/2 − p) / Γ(d/2),
2548    ///   b = p + s − d/2.
2549    ///
2550    /// Finite whenever `2(p+s) > d` and (for the Γ(d/2 − p) factor to avoid a
2551    /// pole) `d` is odd or `2p < d`. This reuses none of the Taylor-coefficient
2552    /// machinery under test, so it is a true oracle for the collision diagonal.
2553    fn phi0_closed_form(p: usize, s: usize, d: usize, kappa: f64) -> f64 {
2554        let half_d = 0.5 * d as f64;
2555        let b = p as f64 + s as f64 - half_d;
2556        (4.0 * std::f64::consts::PI).powf(-half_d) / gamma_lanczos(s as f64)
2557            * gamma_lanczos(b)
2558            * kappa.powf(-2.0 * b)
2559            * gamma_lanczos(half_d - p as f64)
2560            / gamma_lanczos(half_d)
2561    }
2562
2563    /// gam#1604 — the collision (r = 0) diagonal of the hybrid Duchon–Matérn
2564    /// kernel must equal the independent closed form above. The half-integer-ν
2565    /// Taylor coefficients that feed `duchon_hybrid_kernel_collision_value`
2566    /// previously miscounted the K_{l+½} polynomial degree (`l = 2|ν| − 1`
2567    /// instead of `|ν| − ½`), zeroing the r⁰ term of every |ν| ≥ 3/2 block and
2568    /// silently dropping their contribution to φ(0).
2569    #[test]
2570    fn hybrid_collision_diagonal_matches_closed_form_1604() {
2571        // Odd dimensions exercise the half-integer-ν path. For each, sweep p, s
2572        // (with 2(p+s) > d) and κ. d = 1, n ≥ 2 ⇒ ν ≥ 3/2 is the regressed case.
2573        for &d in &[1usize, 3, 5] {
2574            for &p in &[1usize, 2, 3] {
2575                for &s in &[1usize, 2, 3, 4] {
2576                    if 2 * (p + s) <= d {
2577                        continue;
2578                    }
2579                    for &kappa in &[0.5f64, 1.0, 2.5] {
2580                        let coeffs = duchon_partial_fraction_coeffs(p, s, kappa);
2581                        let got =
2582                            duchon_hybrid_kernel_collision_value(1.0 / kappa, p, s, d, &coeffs)
2583                                .expect("collision diagonal");
2584                        let want = phi0_closed_form(p, s, d, kappa);
2585                        let rel = (got - want).abs() / want.abs().max(1e-300);
2586                        assert!(
2587                            rel < 1e-10,
2588                            "φ(0) mismatch d={d} p={p} s={s} κ={kappa}: got {got:.12e}, want {want:.12e} (rel {rel:.2e})"
2589                        );
2590                    }
2591                }
2592            }
2593        }
2594    }
2595
2596    /// gam#1604 — the near-collision Taylor branch must be continuous with the
2597    /// direct partial-fraction sum: assembling φ(r) from φ(0), φ″(0), φ⁗(0),
2598    /// φ⁽⁶⁾(0) (all built from the same half-integer-ν Taylor coefficients) must
2599    /// match the cancellation-free direct block sum at a small radius where both
2600    /// are individually accurate. This exercises the j ≥ 1 coefficients (the
2601    /// diagonal test only pins j = 0).
2602    #[test]
2603    fn hybrid_near_collision_continuous_with_direct_1604() {
2604        for &d in &[1usize, 3] {
2605            for &p in &[1usize, 2] {
2606                for &s in &[2usize, 3] {
2607                    if 2 * (p + s) <= d + 6 {
2608                        // Need φ⁽⁶⁾(0) to exist for the full 6th-order Taylor.
2609                        continue;
2610                    }
2611                    for &kappa in &[0.5f64, 1.0, 2.0] {
2612                        let length_scale = 1.0 / kappa;
2613                        let coeffs = duchon_partial_fraction_coeffs(p, s, kappa);
2614                        // r small enough that the truncated 6th-order Taylor is
2615                        // accurate to ~r⁸, yet large enough that the direct block
2616                        // sum has not lost precision (d = 1/3, moderate κ).
2617                        let r = 0.02 * length_scale;
2618                        let taylor = duchon_hybrid_kernel_near_collision_value(
2619                            r,
2620                            length_scale,
2621                            p,
2622                            s,
2623                            d,
2624                            &coeffs,
2625                        )
2626                        .expect("near-collision value");
2627                        // Direct partial-fraction sum (real Bessel-K, no Taylor).
2628                        let mut direct = 0.0f64;
2629                        for (m, &a_m) in coeffs.a.iter().enumerate().skip(1) {
2630                            if a_m != 0.0 {
2631                                direct += a_m * polyharmonic_kernel(r, m as f64, d);
2632                            }
2633                        }
2634                        for (n, &b_n) in coeffs.b.iter().enumerate().skip(1) {
2635                            if b_n != 0.0 {
2636                                direct += b_n
2637                                    * duchon_matern_block(r, kappa, n, d).expect("matern block");
2638                            }
2639                        }
2640                        let rel = (taylor - direct).abs() / direct.abs().max(1e-300);
2641                        assert!(
2642                            rel < 1e-9,
2643                            "near-collision vs direct mismatch d={d} p={p} s={s} κ={kappa} r={r}: \
2644                             taylor {taylor:.12e}, direct {direct:.12e} (rel {rel:.2e})"
2645                        );
2646                    }
2647                }
2648            }
2649        }
2650    }
2651
2652    /// gam#1604 — the production constrained Duchon penalty `Ω_c = α²·ZᵀK_CC Z`
2653    /// for a `d = 1` hybrid smooth with power ≥ 2 must be numerically PSD across
2654    /// realistic length scales. Before the Taylor-degree fix the corrupted
2655    /// diagonal made `Ω_c ≈ Ω_true − δ·I` (δ = the dropped diagonal mass),
2656    /// giving λ_min ≈ −δ < 0 at *every* length scale — the issue's report.
2657    #[test]
2658    fn d1_hybrid_penalty_is_psd_1604() {
2659        let d = 1usize;
2660        let nullspace_order = DuchonNullspaceOrder::Linear; // p = 2
2661        let centers = fixture_centers(d, 12);
2662        let mut cache = BasisCacheContext::default();
2663        let z = kernel_constraint_nullspace(centers.view(), nullspace_order, &mut cache)
2664            .expect("constraint null space");
2665        for &power in &[2.0f64, 3.0] {
2666            for &length_scale in &[0.5f64, 1.0, 10.0, 100.0] {
2667                let omega = duchon_constrained_bending_penalty(
2668                    centers.view(),
2669                    Some(length_scale),
2670                    power,
2671                    nullspace_order,
2672                    None,
2673                    &z,
2674                )
2675                .unwrap_or_else(|e| {
2676                    panic!("d=1 p=2 s={power} ls={length_scale} penalty rejected: {e}")
2677                });
2678                let (penalty, _scale) = normalize_penalty(&omega);
2679                let lam_min = lambda_min(&penalty);
2680                assert!(
2681                    lam_min >= -1e-9,
2682                    "d=1 p=2 s={power} ls={length_scale}: λ_min={lam_min:.6e} (not PSD)"
2683                );
2684            }
2685        }
2686    }
2687
2688    /// No-regression guard: a well-conditioned low-dimensional fixture must keep
2689    /// the exact kernel VALUES the partial-fraction path produced before the
2690    /// gam#1424 fix. For d=2 the stable-integral reroute does not apply
2691    /// (`2p=4 ≥ d=2`), so `duchon_matern_kernel_general_from_distance` still runs
2692    /// the original sum verbatim; pinning it against an independent direct
2693    /// evaluation of the same partial-fraction blocks proves the production
2694    /// routing is unchanged for low `d`.
2695    #[test]
2696    fn low_dim_hybrid_kernel_values_unchanged_1424() {
2697        let d = 2usize;
2698        let p_order = 2usize; // Linear null space (m=2)
2699        let s_order = 2usize;
2700        let kappa = 1.0_f64;
2701        let length_scale = Some(1.0_f64);
2702        // The d=2 case is NOT rerouted to the stable integral.
2703        assert!(!duchon_hybrid_stable_integral_applies(p_order, s_order, d));
2704        let coeffs = duchon_partial_fraction_coeffs(p_order, s_order, kappa);
2705
2706        for &r in &[0.25_f64, 0.75, 1.5] {
2707            // Independent reference: the raw partial-fraction sum
2708            // Σ a_m·r^{2m-d}(·log) + Σ b_n·matern_block, identical in form to the
2709            // production direct-sum branch but assembled here from scratch.
2710            let mut reference = 0.0_f64;
2711            for (m, &coeff) in coeffs.a.iter().enumerate().skip(1) {
2712                if coeff != 0.0 {
2713                    reference += coeff * polyharmonic_kernel(r, m as f64, d);
2714                }
2715            }
2716            for (n, &coeff) in coeffs.b.iter().enumerate().skip(1) {
2717                if coeff != 0.0 {
2718                    reference += coeff * duchon_matern_block(r, kappa, n, d).expect("matern block");
2719                }
2720            }
2721
2722            let got = duchon_matern_kernel_general_from_distance(
2723                r,
2724                length_scale,
2725                p_order,
2726                s_order,
2727                d,
2728                Some(&coeffs),
2729            )
2730            .expect("low-d hybrid kernel value");
2731            assert!(
2732                (got - reference).abs() <= 1e-10,
2733                "low-d hybrid kernel value regressed at r={r}: got {got:.15e}, reference {reference:.15e}"
2734            );
2735        }
2736    }
2737
2738    /// #1817: a low-order/low-power Duchon config with the stiffness (D2)
2739    /// operator active — d=2, `Linear` null space (p=2), power s=0 — has
2740    /// `2(p+s)=4`, which clears the pointwise margin (>d=2) and D1 (>d+1=3) but
2741    /// NOT D2 (>d+2=4), so the collocation guard used to fire mid-fit. The order
2742    /// must now auto-raise so the mass+tension+stiffness penalty matrices build
2743    /// cleanly, and the effective order must satisfy the strict D2 margin.
2744    #[test]
2745    fn operator_penalties_auto_raise_order_issue_1817() {
2746        // 4×3 grid on [0,1]² — 12 centers, comfortably above the 6 polynomial
2747        // columns of the auto-raised Degree(2) null space, so the auto-DEGRADE
2748        // path does not interfere with the auto-RAISE under test.
2749        let mut centers = Array2::<f64>::zeros((12, 2));
2750        let mut row = 0;
2751        for i in 0..4 {
2752            for j in 0..3 {
2753                centers[[row, 0]] = i as f64 / 3.0;
2754                centers[[row, 1]] = j as f64 / 2.0;
2755                row += 1;
2756            }
2757        }
2758
2759        let dim = 2usize;
2760        let power = 0.0_f64;
2761        let requested = DuchonNullspaceOrder::Linear;
2762
2763        // The unraised config is exactly the one that trips the D2 guard.
2764        let requested_p = duchon_p_from_nullspace_order(requested);
2765        assert!(
2766            2.0 * (requested_p as f64 + power) <= dim as f64 + 2.0,
2767            "precondition: requested (p,s) must be on the failing side of the D2 margin"
2768        );
2769
2770        // Auto-raise (max_op = 2 ⇒ stiffness/D2 active) must clear the strict D2
2771        // margin 2(p+s) > d+2.
2772        let effective = duchon_order_for_operator_margin(dim, power, requested, 2);
2773        let effective_p = duchon_p_from_nullspace_order(effective);
2774        assert!(
2775            2.0 * (effective_p as f64 + power) > dim as f64 + 2.0,
2776            "auto-raised order must satisfy 2(p+s) > d+2: got 2*({}+{})={} vs d+2={}",
2777            effective_p,
2778            power,
2779            2.0 * (effective_p as f64 + power),
2780            dim as f64 + 2.0
2781        );
2782
2783        // End-to-end: the mass+tension+stiffness penalty matrices (max_op=2)
2784        // must now build without an InvalidInput from the pointwise/collocation
2785        // guard, because the order was raised before the guard could fire.
2786        let penalties = build_duchon_operator_penalty_matrices(
2787            centers.view(),
2788            None,
2789            None, // pure (scale-free) Duchon — the guarded branch
2790            power,
2791            requested,
2792            None,
2793            None,
2794        )
2795        .expect("Duchon mass+tension+stiffness penalties must build after auto-raise (#1817)");
2796        for m in [&penalties.mass, &penalties.tension, &penalties.stiffness] {
2797            assert!(
2798                m.iter().all(|v| v.is_finite()),
2799                "auto-raised operator penalty matrices must be finite"
2800            );
2801        }
2802    }
2803}