gam_solve/
rho_prior_eval.rs

1//! Shared evaluation of the configured deterministic smoothing-parameter (ρ)
2//! prior objective.
3//!
4//! The cost / gradient / Hessian of the configured [`RhoPrior`] is the same
5//! math whether it is consumed by the custom-family outer objective or by the
6//! REML/LAML runtime; only the *invalid-prior policy* differs between callers.
7//! Custom-family handling surfaces malformed priors as hard errors, while the
8//! REML runtime folds them into the objective as `+inf` cost (and `NaN`
9//! gradient/Hessian) so the outer optimizer steps away from the offending
10//! region. This module owns the single source of truth for the prior math and
11//! lets each caller pick its policy explicitly via [`InvalidPriorPolicy`].
12//!
13//! For a single coordinate `r` (a log-precision, `λ = exp(r)`):
14//!
15//! * `Flat`           — cost 0, grad 0, Hessian 0.
16//! * `Normal{mean,sd}`— with `inv_var = 1 / sd²`,
17//!   cost `0.5 · (r − mean)² · inv_var`, grad `(r − mean) · inv_var`,
18//!   Hessian `inv_var`.
19//! * `GammaPrecision{shape,rate}` — Gamma(shape, rate) hyperprior on the
20//!   precision `λ = exp(r)`, using the REML/LAML MAP-in-lambda convention and
21//!   contributing (up to an additive constant) cost `rate · λ − (shape − 1) · r`,
22//!   grad `rate · λ − (shape − 1)`, Hessian `rate · λ`. Samplers over `r`
23//!   include the `+r` Jacobian and therefore use the transformed density instead.
24//! * `Independent`    — one prior per coordinate; the same per-coordinate math
25//!   summed/stacked, with nested `Independent` priors rejected as invalid.
26//!
27//! The Hessian is diagonal and is returned as `Some` only when at least one
28//! diagonal entry is non-zero; an all-zero curvature contribution collapses to
29//! `None` so callers can skip adding an empty block.
30
31use gam_spec::RhoPrior;
32use ndarray::{Array1, Array2};
33
34/// Calibrated exponential rate `θ = −ln(tail_prob) / upper` of a
35/// penalized-complexity prior, from the tail statement `P(d > upper) =
36/// tail_prob` on the distance scale `d = exp(-ρ/2)`. Caller must validate the
37/// hyperparameters first; with `0 < tail_prob < 1` and `upper > 0` the result
38/// is finite and strictly positive.
39pub(crate) fn pc_prior_rate(upper: f64, tail_prob: f64) -> f64 {
40    -tail_prob.ln() / upper
41}
42
43/// Negative-log penalized-complexity prior contribution and its first/second
44/// derivatives in the log-precision `r`, for calibrated rate `θ`. The objective
45/// is minimized, so this returns the cost (up to the ρ-independent additive
46/// constant `−ln(θ/2)`), gradient and Hessian:
47///
48/// ```text
49/// cost = r/2 + θ exp(-r/2),  grad = 1/2 − (θ/2) exp(-r/2),  hess = (θ/4) exp(-r/2).
50/// ```
51///
52/// The curvature is strictly positive (`θ > 0`), so the contribution is convex
53/// and always supplies usable outer-Hessian information.
54pub(crate) fn pc_prior_terms(theta: f64, r: f64) -> (f64, f64, f64) {
55    let e = (-0.5 * r).exp();
56    (0.5 * r + theta * e, 0.5 - 0.5 * theta * e, 0.25 * theta * e)
57}
58
59/// Self-gated, *one-sided* penalized-complexity barrier used as the firth-general
60/// DEFAULT outer ρ prior on smoothing coordinates the caller left unset. It walls
61/// off the `λ → 0` / `ρ → −∞` under-smoothing degeneracy WITHOUT perturbing a
62/// well-identified `λ` — restoring the strict zero-downside guarantee (a clean
63/// fit stays byte-identical to plain REML), exactly the way the Jeffreys
64/// conditioning gate returns a byte-identical-zero contribution on a clean fit.
65///
66/// MOTIVATION. The plain PC prior contributes `cost = ρ/2 + θ e^{-ρ/2}` whose
67/// gradient `1/2 − (θ/2) e^{-ρ/2}` tends to the CONSTANT `+1/2` as `ρ → +∞`. That
68/// residual `O(1)` Occam pull shifts the REML optimum of EVERY identified `λ` by
69/// `Δρ ≈ (1/2)/H_reml = O(1/n)` — small, but never byte-zero, so it is a
70/// (tiny) downside on every fit including well-conditioned ones. The firth
71/// default exists only to remove the `λ → 0` degeneracy, not to bias an
72/// identified `λ`, so the default should be EXACTLY flat on the identified side.
73///
74/// SHAPE. Work on the distance scale `b(ρ) = e^{-ρ/2}` (the marginal-SD scale of
75/// the penalized component; `b → ∞` is the under-smoothing degeneracy). Gate at
76/// `ρ_gate` with `b_gate = e^{-ρ_gate/2}`:
77///   * `ρ ≥ ρ_gate` (identified side, distance `b ≤ b_gate`): cost = grad = hess
78///     = 0, BYTE-IDENTICALLY (a clean fit pays nothing — strict zero-downside).
79///   * `ρ < ρ_gate` (degenerate side, distance `b > b_gate`): a convex barrier
80///     that is C¹ at the gate,
81///       cost = θ [ b(ρ) − b_gate − (1/2) b_gate (ρ_gate − ρ) ],
82///       grad = θ [ −(1/2) b(ρ) + (1/2) b_gate ]   (= 0 at the gate, < 0 below),
83///       hess = (θ/4) b(ρ)  (> 0, always usable curvature).
84///     The gradient is negative below the gate (pushing `ρ` UP, away from the
85///     `λ → 0` wall) and decays continuously to zero AT the gate, so there is no
86///     persistent pull on the identified side.
87///
88/// GATE CHOICE. `ρ_gate = −2 ln(upper)`, i.e. the barrier engages only once the
89/// marginal-SD distance `b = e^{-ρ/2}` exceeds the SAME interpretable `upper`
90/// bound that calibrates `θ` (`P(b > upper) = tail_prob`). Any `λ` whose
91/// distance scale is within the plausible identified range `b ≤ upper` is left
92/// exactly untouched; the wall only asserts itself in the genuinely
93/// under-smoothed tail the prior was introduced to bound.
94pub(crate) fn firth_default_barrier_terms(theta: f64, upper: f64, r: f64) -> (f64, f64, f64) {
95    let rho_gate = -2.0 * upper.ln();
96    if r >= rho_gate {
97        // Identified side: byte-identically flat (strict zero-downside).
98        return (0.0, 0.0, 0.0);
99    }
100    let b = (-0.5 * r).exp();
101    let b_gate = (-0.5 * rho_gate).exp(); // == upper, but kept symbolic for clarity.
102    let cost = theta * (b - b_gate - 0.5 * b_gate * (rho_gate - r));
103    let grad = theta * (-0.5 * b + 0.5 * b_gate);
104    let hess = 0.25 * theta * b;
105    (cost, grad, hess)
106}
107
108/// What a caller wants done when the configured prior is malformed (e.g. a
109/// `Normal` with non-positive `sd`, a `GammaPrecision` with non-positive
110/// `shape`, an `Independent` whose length disagrees with `ρ`, or a nested
111/// `Independent`).
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113pub enum InvalidPriorPolicy {
114    /// Return a descriptive [`RhoPriorError`]. Used by custom-family handling,
115    /// which validates configuration up front and reports problems precisely.
116    HardError,
117    /// Saturate the contribution so the outer optimizer is repelled: cost is
118    /// `+inf` and gradient/Hessian entries are `NaN`. Used by the REML/LAML
119    /// runtime, where the prior is one additive term inside a search.
120    Saturate,
121}
122
123/// Structured malformed-prior diagnostic produced under
124/// [`InvalidPriorPolicy::HardError`].
125#[derive(Debug, Clone, PartialEq, Eq)]
126pub enum RhoPriorError {
127    /// A length / shape disagreement (e.g. an `Independent` prior whose number
128    /// of coordinates does not match `ρ`).
129    DimensionMismatch { reason: String },
130    /// A structurally invalid prior, such as a nested `Independent` where a
131    /// scalar prior is required.
132    ConstraintViolation { reason: String },
133}
134
135impl RhoPriorError {
136    pub(crate) fn dimension_mismatch(reason: String) -> Self {
137        RhoPriorError::DimensionMismatch { reason }
138    }
139
140    pub(crate) fn constraint_violation(reason: String) -> Self {
141        RhoPriorError::ConstraintViolation { reason }
142    }
143}
144
145/// Cost, gradient, and (optional) diagonal Hessian of the configured ρ prior.
146#[derive(Debug, Clone)]
147pub struct RhoPriorEval {
148    pub cost: f64,
149    pub gradient: Array1<f64>,
150    /// Diagonal Hessian, `Some` only when at least one entry is non-zero.
151    pub hessian: Option<Array2<f64>>,
152}
153
154/// Per-coordinate scalar contribution `(cost, grad, hess)` of one *scalar*
155/// prior at `r`, or a structured error when the scalar prior is malformed.
156/// Nested `Independent` priors are not scalar and are rejected here.
157pub(crate) fn scalar_terms(
158    prior: &RhoPrior,
159    r: f64,
160    context: &str,
161) -> Result<(f64, f64, f64), RhoPriorError> {
162    match prior {
163        RhoPrior::Flat => Ok((0.0, 0.0, 0.0)),
164        RhoPrior::Normal { mean, sd } => {
165            if !mean.is_finite() || !sd.is_finite() || *sd <= 0.0 {
166                return Err(RhoPriorError::constraint_violation(format!(
167                    "{context} Normal log-precision prior requires finite mean and sd > 0"
168                )));
169            }
170            let inv_var = 1.0 / (*sd * *sd);
171            let delta = r - *mean;
172            Ok((0.5 * delta * delta * inv_var, delta * inv_var, inv_var))
173        }
174        RhoPrior::GammaPrecision { shape, rate } => {
175            if !shape.is_finite() || *shape <= 0.0 || !rate.is_finite() || *rate < 0.0 {
176                return Err(RhoPriorError::constraint_violation(format!(
177                    "{context} Gamma precision prior requires shape > 0 and rate >= 0"
178                )));
179            }
180            let lambda = r.exp();
181            // Deterministic REML/LAML uses the MAP-in-lambda convention; rho samplers add the Jacobian.
182            Ok((
183                *rate * lambda - (*shape - 1.0) * r,
184                *rate * lambda - (*shape - 1.0),
185                *rate * lambda,
186            ))
187        }
188        RhoPrior::PenalizedComplexity { upper, tail_prob } => {
189            if !upper.is_finite() || *upper <= 0.0 {
190                return Err(RhoPriorError::constraint_violation(format!(
191                    "{context} penalized-complexity prior requires a finite upper > 0"
192                )));
193            }
194            if !tail_prob.is_finite() || *tail_prob <= 0.0 || *tail_prob >= 1.0 {
195                return Err(RhoPriorError::constraint_violation(format!(
196                    "{context} penalized-complexity prior requires tail probability in (0, 1)"
197                )));
198            }
199            let theta = pc_prior_rate(*upper, *tail_prob);
200            Ok(pc_prior_terms(theta, r))
201        }
202        RhoPrior::Independent(_) => Err(RhoPriorError::constraint_violation(format!(
203            "{context} must be a scalar rho prior, not a nested Independent prior"
204        ))),
205    }
206}
207
208/// Saturated contribution returned (under [`InvalidPriorPolicy::Saturate`])
209/// when the prior is malformed: `+inf` cost, `NaN` gradient, `NaN` Hessian.
210pub(crate) fn saturated(len: usize) -> RhoPriorEval {
211    RhoPriorEval {
212        cost: f64::INFINITY,
213        gradient: Array1::from_elem(len, f64::NAN),
214        hessian: Some(Array2::from_elem((len, len), f64::NAN)),
215    }
216}
217
218/// Evaluate the configured ρ prior under the given invalid-prior `policy`.
219///
220/// On success returns the prior `cost`, `gradient`, and diagonal `hessian`
221/// (the latter `Some` only when non-zero). Under [`InvalidPriorPolicy::Saturate`]
222/// a malformed prior yields the saturated `+inf`/`NaN` contribution and never
223/// errors; under [`InvalidPriorPolicy::HardError`] it returns the structured
224/// [`RhoPriorError`].
225pub fn evaluate(
226    prior: &RhoPrior,
227    rho: &Array1<f64>,
228    policy: InvalidPriorPolicy,
229) -> Result<RhoPriorEval, RhoPriorError> {
230    match evaluate_strict(prior, rho) {
231        Ok(eval) => Ok(eval),
232        Err(err) => match policy {
233            InvalidPriorPolicy::HardError => Err(err),
234            InvalidPriorPolicy::Saturate => Ok(saturated(rho.len())),
235        },
236    }
237}
238
239/// Strict evaluation: always errors on a malformed prior. Policy mapping lives
240/// in [`evaluate`].
241pub fn evaluate_strict(
242    prior: &RhoPrior,
243    rho: &Array1<f64>,
244) -> Result<RhoPriorEval, RhoPriorError> {
245    let len = rho.len();
246    match prior {
247        RhoPrior::Flat => Ok(RhoPriorEval {
248            cost: 0.0,
249            gradient: Array1::zeros(len),
250            hessian: None,
251        }),
252        RhoPrior::Normal { .. }
253        | RhoPrior::GammaPrecision { .. }
254        | RhoPrior::PenalizedComplexity { .. } => {
255            let mut cost = 0.0;
256            let mut gradient = Array1::<f64>::zeros(len);
257            let mut hessian = Array2::<f64>::zeros((len, len));
258            let mut any_hessian = false;
259            for (idx, &r) in rho.iter().enumerate() {
260                let (c, g, h) = scalar_terms(prior, r, "rho prior")?;
261                cost += c;
262                gradient[idx] = g;
263                hessian[[idx, idx]] = h;
264                any_hessian |= h != 0.0;
265            }
266            Ok(RhoPriorEval {
267                cost,
268                gradient,
269                hessian: any_hessian.then_some(hessian),
270            })
271        }
272        RhoPrior::Independent(priors) => {
273            if priors.len() != len {
274                return Err(RhoPriorError::dimension_mismatch(format!(
275                    "Independent rho prior length mismatch: got {}, expected {}",
276                    priors.len(),
277                    len
278                )));
279            }
280            let mut cost = 0.0;
281            let mut gradient = Array1::<f64>::zeros(len);
282            let mut hessian = Array2::<f64>::zeros((len, len));
283            let mut any_hessian = false;
284            for (idx, (prior, &r)) in priors.iter().zip(rho.iter()).enumerate() {
285                let (c, g, h) = scalar_terms(prior, r, &format!("rho prior coordinate {idx}"))?;
286                cost += c;
287                gradient[idx] = g;
288                hessian[[idx, idx]] = h;
289                any_hessian |= h != 0.0;
290            }
291            Ok(RhoPriorEval {
292                cost,
293                gradient,
294                hessian: any_hessian.then_some(hessian),
295            })
296        }
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    pub(crate) fn approx(a: f64, b: f64) {
305        assert!((a - b).abs() <= 1e-12, "expected {a} ~= {b}");
306    }
307
308    /// The shared engine and the saturating policy must agree term-for-term on
309    /// every valid prior variant — this is the parity guarantee the two former
310    /// duplicate call sites relied on.
311    #[test]
312    pub(crate) fn cost_grad_hess_parity_across_valid_priors() {
313        let rho = Array1::from_vec(vec![-0.5, 0.25, 1.5, 0.7]);
314        let priors = vec![
315            RhoPrior::Flat,
316            RhoPrior::Normal { mean: 0.2, sd: 0.8 },
317            RhoPrior::GammaPrecision {
318                shape: 2.0,
319                rate: 0.5,
320            },
321            RhoPrior::PenalizedComplexity {
322                upper: 0.5,
323                tail_prob: 0.05,
324            },
325            RhoPrior::Independent(vec![
326                RhoPrior::Flat,
327                RhoPrior::Normal {
328                    mean: -0.1,
329                    sd: 1.3,
330                },
331                RhoPrior::GammaPrecision {
332                    shape: 1.5,
333                    rate: 0.0,
334                },
335                RhoPrior::PenalizedComplexity {
336                    upper: 1.2,
337                    tail_prob: 0.01,
338                },
339            ]),
340        ];
341        for prior in &priors {
342            // Both policies must produce identical results when the prior is
343            // valid: the policy only changes the malformed branch.
344            let hard = evaluate(prior, &rho, InvalidPriorPolicy::HardError)
345                .expect("valid prior must not error under HardError");
346            let sat =
347                evaluate(prior, &rho, InvalidPriorPolicy::Saturate).expect("Saturate never errors");
348            approx(hard.cost, sat.cost);
349            assert_eq!(hard.gradient, sat.gradient);
350            assert_eq!(hard.hessian, sat.hessian);
351
352            // Finite-difference check of gradient and (diagonal) Hessian. The
353            // gradient uses a small step; the Hessian needs a larger one
354            // because a central second difference amplifies roundoff like
355            // macheps/h² — the optimal step is ≈ macheps^¼ ≈ 1e-4.
356            let base = evaluate(prior, &rho, InvalidPriorPolicy::HardError).unwrap();
357            let cost_at = |k: usize, delta: f64| -> f64 {
358                let mut r = rho.clone();
359                r[k] += delta;
360                evaluate(prior, &r, InvalidPriorPolicy::HardError)
361                    .unwrap()
362                    .cost
363            };
364            let (h_grad, h_hess) = (1e-6, 1e-4);
365            for k in 0..rho.len() {
366                let fd_grad = (cost_at(k, h_grad) - cost_at(k, -h_grad)) / (2.0 * h_grad);
367                assert!(
368                    (fd_grad - base.gradient[k]).abs() <= 1e-5,
369                    "gradient mismatch at {k}: fd {fd_grad} vs {}",
370                    base.gradient[k]
371                );
372                let fd_hess = (cost_at(k, h_hess) - 2.0 * base.cost + cost_at(k, -h_hess))
373                    / (h_hess * h_hess);
374                let analytic_hess = base.hessian.as_ref().map_or(0.0, |h| h[[k, k]]);
375                assert!(
376                    (fd_hess - analytic_hess).abs() <= 1e-4,
377                    "hessian mismatch at {k}: fd {fd_hess} vs {analytic_hess}"
378                );
379            }
380        }
381    }
382
383    #[test]
384    pub(crate) fn invalid_prior_policy_branches() {
385        let rho = Array1::from_vec(vec![0.0, 0.0]);
386        let bad_normal = RhoPrior::Normal {
387            mean: 0.0,
388            sd: -1.0,
389        };
390        // HardError surfaces a structured error.
391        assert!(matches!(
392            evaluate(&bad_normal, &rho, InvalidPriorPolicy::HardError),
393            Err(RhoPriorError::ConstraintViolation { .. })
394        ));
395        // Saturate folds it into +inf / NaN.
396        let sat = evaluate(&bad_normal, &rho, InvalidPriorPolicy::Saturate).unwrap();
397        assert!(sat.cost.is_infinite() && sat.cost > 0.0);
398        assert!(sat.gradient.iter().all(|v| v.is_nan()));
399        assert!(sat.hessian.unwrap().iter().all(|v| v.is_nan()));
400
401        // Length-mismatched Independent.
402        let bad_len = RhoPrior::Independent(vec![RhoPrior::Flat]);
403        assert!(matches!(
404            evaluate(&bad_len, &rho, InvalidPriorPolicy::HardError),
405            Err(RhoPriorError::DimensionMismatch { .. })
406        ));
407        // Nested Independent.
408        let nested = RhoPrior::Independent(vec![
409            RhoPrior::Independent(vec![RhoPrior::Flat]),
410            RhoPrior::Flat,
411        ]);
412        assert!(matches!(
413            evaluate(&nested, &rho, InvalidPriorPolicy::HardError),
414            Err(RhoPriorError::ConstraintViolation { .. })
415        ));
416    }
417
418    // ---- Penalized-complexity prior ---------------------------------------
419
420    /// Normalized PC-prior log-density on ρ (includes the additive constant
421    /// `ln(θ/2)` the optimizer cost drops). `log p(ρ) = ln(θ/2) − ρ/2 − θ
422    /// exp(−ρ/2)`, the change-of-variables image of `d ~ Exp(θ)` under
423    /// `d = exp(−ρ/2)`.
424    pub(crate) fn pc_log_pdf(upper: f64, tail_prob: f64, r: f64) -> f64 {
425        let theta = pc_prior_rate(upper, tail_prob);
426        (0.5 * theta).ln() - 0.5 * r - theta * (-0.5 * r).exp()
427    }
428
429    #[test]
430    pub(crate) fn pc_rate_calibrates_to_tail_statement() {
431        // θ = −ln(α)/U solves P(d > U) = exp(−θU) = α exactly.
432        for &(upper, alpha) in &[(0.5_f64, 0.05_f64), (1.2, 0.01), (3.0, 0.25)] {
433            let theta = pc_prior_rate(upper, alpha);
434            let tail = (-theta * upper).exp();
435            assert!(
436                (tail - alpha).abs() < 1e-12,
437                "P(d>U)={tail} vs α={alpha} (U={upper})"
438            );
439        }
440    }
441
442    #[test]
443    pub(crate) fn pc_density_integrates_to_one_and_matches_tail() {
444        // Trapezoidal integration of the normalized ρ-density. d = exp(−ρ/2)
445        // ranges over (0, ∞); the density decays at both ends, so a wide grid
446        // captures essentially all the mass.
447        let upper = 0.5_f64;
448        let alpha = 0.05_f64;
449        let (lo, hi, n) = (-60.0_f64, 80.0_f64, 2_000_000usize);
450        let h = (hi - lo) / n as f64;
451        // ρ < −2 ln U  ⇔  exp(−ρ/2) > U  ⇔  d > U: the tail region.
452        let tail_boundary = -2.0 * upper.ln();
453        let mut total = 0.0;
454        let mut tail = 0.0;
455        for i in 0..=n {
456            let r = lo + i as f64 * h;
457            let w = if i == 0 || i == n { 0.5 } else { 1.0 };
458            let p = pc_log_pdf(upper, alpha, r).exp();
459            total += w * p;
460            if r <= tail_boundary {
461                tail += w * p;
462            }
463        }
464        total *= h;
465        tail *= h;
466        assert!((total - 1.0).abs() < 1e-4, "∫ p(ρ) dρ = {total}");
467        // P(d > U) recovered from the ρ-density must equal the calibration α.
468        assert!(
469            (tail - alpha).abs() < 1e-3,
470            "P(d>U) = {tail} vs α = {alpha}"
471        );
472    }
473
474    #[test]
475    pub(crate) fn pc_terms_are_negative_log_density_derivatives() {
476        // cost = −log p (up to the dropped constant); grad/hess are its ρ
477        // derivatives, cross-checked against a finite difference of pc_log_pdf.
478        let (upper, alpha) = (0.8_f64, 0.02_f64);
479        let theta = pc_prior_rate(upper, alpha);
480        // Small step for the first derivative; larger step for the second,
481        // whose central difference amplifies roundoff like macheps/h².
482        let (h1, h2) = (1e-6, 1e-4);
483        for &r in &[-2.0_f64, -0.3, 0.0, 1.7, 4.0] {
484            let (cost, grad, hess) = pc_prior_terms(theta, r);
485            // cost + log p(ρ) is the ρ-independent constant ln(θ/2).
486            approx(cost + pc_log_pdf(upper, alpha, r), (0.5 * theta).ln());
487            // grad = −d/dρ log p  (FD on the log-density).
488            let dlp =
489                (pc_log_pdf(upper, alpha, r + h1) - pc_log_pdf(upper, alpha, r - h1)) / (2.0 * h1);
490            let neg_dlp = -dlp;
491            assert!(
492                (grad - neg_dlp).abs() < 1e-5,
493                "grad {grad} vs {neg_dlp} at r={r}"
494            );
495            // hess = −d²/dρ² log p (FD), and is strictly positive (convex).
496            let d2lp = (pc_log_pdf(upper, alpha, r + h2) - 2.0 * pc_log_pdf(upper, alpha, r)
497                + pc_log_pdf(upper, alpha, r - h2))
498                / (h2 * h2);
499            let neg_d2lp = -d2lp;
500            assert!(
501                (hess - neg_d2lp).abs() < 1e-4,
502                "hess {hess} vs {neg_d2lp} at r={r}"
503            );
504            assert!(hess > 0.0, "PC curvature must be positive, got {hess}");
505        }
506    }
507
508    #[test]
509    pub(crate) fn pc_prior_pulls_toward_simpler_model() {
510        // The simpler (base) model is more smoothing: larger ρ ⇒ larger
511        // precision λ = exp(ρ) ⇒ the penalized component collapses. The PC cost
512        // must therefore make *under*-smoothing (small ρ, wiggly) more expensive
513        // than over-smoothing of the same magnitude — an asymmetric, convex
514        // bowl whose gradient at the base side is bounded by 1/2.
515        let prior = RhoPrior::PenalizedComplexity {
516            upper: 1.0,
517            tail_prob: 0.05,
518        };
519        let cost = |r: f64| {
520            evaluate(
521                &prior,
522                &Array1::from_vec(vec![r]),
523                InvalidPriorPolicy::HardError,
524            )
525            .unwrap()
526            .cost
527        };
528        // Wiggly (ρ = −4) is penalized far more than smooth (ρ = +4).
529        assert!(
530            cost(-4.0) > cost(4.0),
531            "under-smoothing must cost more: {} vs {}",
532            cost(-4.0),
533            cost(4.0)
534        );
535        // As ρ → +∞ the cost grows only linearly (slope 1/2): a gentle pull, not
536        // a wall — the data can still buy complexity.
537        let g_far = evaluate(
538            &prior,
539            &Array1::from_vec(vec![25.0]),
540            InvalidPriorPolicy::HardError,
541        )
542        .unwrap()
543        .gradient[0];
544        assert!(
545            (g_far - 0.5).abs() < 1e-3,
546            "far over-smoothing slope {g_far}"
547        );
548    }
549
550    #[test]
551    pub(crate) fn pc_prior_rejects_invalid_hyperparameters() {
552        let rho = Array1::from_vec(vec![0.0]);
553        for bad in [
554            RhoPrior::PenalizedComplexity {
555                upper: 0.0,
556                tail_prob: 0.05,
557            },
558            RhoPrior::PenalizedComplexity {
559                upper: -1.0,
560                tail_prob: 0.05,
561            },
562            RhoPrior::PenalizedComplexity {
563                upper: 1.0,
564                tail_prob: 0.0,
565            },
566            RhoPrior::PenalizedComplexity {
567                upper: 1.0,
568                tail_prob: 1.0,
569            },
570            RhoPrior::PenalizedComplexity {
571                upper: 1.0,
572                tail_prob: f64::NAN,
573            },
574        ] {
575            assert!(matches!(
576                evaluate(&bad, &rho, InvalidPriorPolicy::HardError),
577                Err(RhoPriorError::ConstraintViolation { .. })
578            ));
579            let sat = evaluate(&bad, &rho, InvalidPriorPolicy::Saturate).unwrap();
580            assert!(sat.cost.is_infinite() && sat.cost > 0.0);
581        }
582    }
583}
gam_solve/rho_prior_eval.rs

gam_solve/
rho_prior_eval.rs