gam_spec/lib.rs
1use ndarray::{Array1, ArrayView1};
2use serde::{Deserialize, Serialize};
3
4/// Hyperprior placed on a coefficient group's precision / log-precision.
5#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6pub enum CoefficientGroupPrior {
7 Flat,
8 NormalLogPrecision {
9 mean: f64,
10 sd: f64,
11 },
12 GammaPrecision {
13 shape: f64,
14 rate: f64,
15 },
16 /// Penalized-complexity prior calibrated by `P(exp(-rho/2) > upper) =
17 /// tail_prob`; see [`RhoPrior::PenalizedComplexity`].
18 PenalizedComplexity {
19 upper: f64,
20 tail_prob: f64,
21 },
22}
23
24impl CoefficientGroupPrior {
25 pub fn to_rho_prior(&self) -> RhoPrior {
26 match *self {
27 Self::Flat => RhoPrior::Flat,
28 Self::NormalLogPrecision { mean, sd } => RhoPrior::Normal { mean, sd },
29 Self::GammaPrecision { shape, rate } => RhoPrior::GammaPrecision { shape, rate },
30 Self::PenalizedComplexity { upper, tail_prob } => {
31 RhoPrior::PenalizedComplexity { upper, tail_prob }
32 }
33 }
34 }
35
36 pub fn validate(&self, context: &str) -> Result<(), String> {
37 match *self {
38 Self::Flat => Ok(()),
39 Self::NormalLogPrecision { mean, sd } => {
40 if !mean.is_finite() {
41 return Err(format!(
42 "{context} Normal log-precision prior requires finite mean, got {mean}"
43 ));
44 }
45 if !sd.is_finite() || sd <= 0.0 {
46 return Err(format!(
47 "{context} Normal log-precision prior requires sd > 0, got {sd}"
48 ));
49 }
50 Ok(())
51 }
52 Self::GammaPrecision { shape, rate } => {
53 if !shape.is_finite() || shape <= 0.0 {
54 return Err(format!(
55 "{context} Gamma precision prior requires shape > 0, got {shape}"
56 ));
57 }
58 if !rate.is_finite() || rate < 0.0 {
59 return Err(format!(
60 "{context} Gamma precision prior requires rate >= 0, got {rate}"
61 ));
62 }
63 Ok(())
64 }
65 Self::PenalizedComplexity { upper, tail_prob } => {
66 if !upper.is_finite() || upper <= 0.0 {
67 return Err(format!(
68 "{context} penalized-complexity prior requires upper > 0, got {upper}"
69 ));
70 }
71 if !tail_prob.is_finite() || tail_prob <= 0.0 || tail_prob >= 1.0 {
72 return Err(format!(
73 "{context} penalized-complexity prior requires tail probability in (0, 1), got {tail_prob}"
74 ));
75 }
76 Ok(())
77 }
78 }
79 }
80}
81
82/// Shared default for monotone wiggle/deviation blocks. Formula DSL defaults,
83/// workflow configs, and runtime deviation blocks should all derive from this
84/// type so reproducible presets do not drift across layers.
85#[derive(Clone, Debug, Serialize, Deserialize)]
86pub struct WigglePenaltyConfig {
87 pub degree: usize,
88 pub num_internal_knots: usize,
89 pub penalty_orders: Vec<usize>,
90 pub double_penalty: bool,
91 pub monotonicity_eps: f64,
92}
93
94impl WigglePenaltyConfig {
95 pub fn cubic_triple_operator_default() -> Self {
96 Self {
97 degree: 3,
98 num_internal_knots: 8,
99 penalty_orders: vec![1, 2, 3],
100 double_penalty: true,
101 monotonicity_eps: 1e-4,
102 }
103 }
104}
105
106/// Shared engine-level link selector for generalized models. This is the
107/// "wide" link descriptor: CLI parsing, formula DSL, and the projection from
108/// `InverseLink::link_function()` all live in this enum, so it carries every
109/// link kind the engine knows about — including the state-bearing
110/// `Sas` / `BetaLogistic` cases.
111///
112/// `LinkFunction` is *not* the right type for the state-less `InverseLink::Standard`
113/// cell. Use [`StandardLink`] there: the type system then refuses to construct
114/// a state-less `Standard(Sas)` / `Standard(BetaLogistic)` placeholder.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
116pub enum LinkFunction {
117 Logit,
118 Probit,
119 CLogLog,
120 Sas,
121 BetaLogistic,
122 Identity,
123 Log,
124}
125
126impl LinkFunction {
127 #[inline]
128 pub const fn name(self) -> &'static str {
129 match self {
130 Self::Logit => "logit",
131 Self::Probit => "probit",
132 Self::CLogLog => "cloglog",
133 Self::Sas => "sas",
134 Self::BetaLogistic => "beta-logistic",
135 Self::Identity => "identity",
136 Self::Log => "log",
137 }
138 }
139}
140
141/// Legal-only link descriptor for the state-less `InverseLink::Standard` cell.
142///
143/// `Sas` / `BetaLogistic` are state-bearing and live in their own
144/// `InverseLink::Sas(_)` / `InverseLink::BetaLogistic(_)` variants. The type
145/// system enforces that fact by omitting them here, so the historical
146/// "state-less placeholder" pattern (`InverseLink::Standard(LinkFunction::Sas)`)
147/// no longer compiles.
148#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
149pub enum StandardLink {
150 Logit,
151 Probit,
152 CLogLog,
153 Identity,
154 Log,
155}
156
157impl StandardLink {
158 #[inline]
159 pub const fn name(self) -> &'static str {
160 self.as_link_function().name()
161 }
162
163 #[inline]
164 pub const fn as_link_function(self) -> LinkFunction {
165 match self {
166 Self::Logit => LinkFunction::Logit,
167 Self::Probit => LinkFunction::Probit,
168 Self::CLogLog => LinkFunction::CLogLog,
169 Self::Identity => LinkFunction::Identity,
170 Self::Log => LinkFunction::Log,
171 }
172 }
173}
174
175impl From<StandardLink> for LinkFunction {
176 #[inline]
177 fn from(link: StandardLink) -> Self {
178 link.as_link_function()
179 }
180}
181
182/// Error returned when narrowing a wide [`LinkFunction`] into a [`StandardLink`].
183/// `Sas` and `BetaLogistic` are state-bearing and have no legal `Standard(_)`
184/// representation; they must be routed through `InverseLink::Sas` /
185/// `InverseLink::BetaLogistic`.
186#[derive(Debug, Clone, Copy, PartialEq, Eq)]
187pub struct StateBearingLinkInStandardSlot(pub LinkFunction);
188
189impl std::fmt::Display for StateBearingLinkInStandardSlot {
190 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
191 write!(
192 f,
193 "state-bearing link `{}` cannot be carried by `InverseLink::Standard`; \
194 route through `InverseLink::Sas` / `InverseLink::BetaLogistic`",
195 self.0.name()
196 )
197 }
198}
199
200impl std::error::Error for StateBearingLinkInStandardSlot {}
201
202impl TryFrom<LinkFunction> for StandardLink {
203 type Error = StateBearingLinkInStandardSlot;
204
205 #[inline]
206 fn try_from(link: LinkFunction) -> Result<Self, Self::Error> {
207 match link {
208 LinkFunction::Logit => Ok(Self::Logit),
209 LinkFunction::Probit => Ok(Self::Probit),
210 LinkFunction::CLogLog => Ok(Self::CLogLog),
211 LinkFunction::Identity => Ok(Self::Identity),
212 LinkFunction::Log => Ok(Self::Log),
213 LinkFunction::Sas | LinkFunction::BetaLogistic => {
214 Err(StateBearingLinkInStandardSlot(link))
215 }
216 }
217 }
218}
219
220/// Supported inverse-link components for convex blended inverse links.
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
222pub enum LinkComponent {
223 Probit,
224 Logit,
225 CLogLog,
226 LogLog,
227 Cauchit,
228}
229
230impl LinkComponent {
231 #[inline]
232 pub const fn name(self) -> &'static str {
233 match self {
234 Self::Probit => "probit",
235 Self::Logit => "logit",
236 Self::CLogLog => "cloglog",
237 Self::LogLog => "loglog",
238 Self::Cauchit => "cauchit",
239 }
240 }
241}
242
243/// User-facing configuration for a blended inverse link.
244#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
245pub struct MixtureLinkSpec {
246 pub components: Vec<LinkComponent>,
247 /// Free logits for components [0..K-2]. The final component logit is fixed at 0.
248 pub initial_rho: Array1<f64>,
249}
250
251/// Runtime blended-link state with precomputed softmax weights.
252#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
253pub struct MixtureLinkState {
254 pub components: Vec<LinkComponent>,
255 /// Free logits for components [0..K-2]. The final component logit is fixed at 0.
256 pub rho: Array1<f64>,
257 /// Softmax-normalized component weights (length K).
258 pub pi: Array1<f64>,
259}
260
261/// User-facing configuration for the continuous sinh-arcsinh inverse link.
262#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
263pub struct SasLinkSpec {
264 pub initial_epsilon: f64,
265 pub initial_log_delta: f64,
266}
267
268/// Runtime state shared by the two-parameter `Sas` and `BetaLogistic` links:
269/// an `epsilon` skew/asymmetry term plus a raw log-scale parameter (`log_delta`)
270/// and its derived positive companion (`delta`). The `delta` field's meaning is
271/// link-specific — see its doc — so derivative kernels must consume `log_delta`,
272/// never `delta`.
273#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
274pub struct SasLinkState {
275 pub epsilon: f64,
276 /// Raw optimization parameter. For `Sas` this is the pre-bound log-tail; for
277 /// `BetaLogistic` it is the unconstrained log geometric-mean beta shape (the
278 /// `log_shape_center` the beta-logistic kernels expect).
279 pub log_delta: f64,
280 /// Derived positive companion of `log_delta`. Its meaning depends on the link:
281 /// - `Sas`: effective tail parameter `delta = exp(B * tanh(log_delta / B))`,
282 /// `B = SAS_LOG_DELTA_BOUND`.
283 /// - `BetaLogistic`: geometric-mean beta shape `exp(log_delta) = sqrt(a*b)`.
284 /// The beta-logistic derivative kernels take `log_delta` (the log center), so
285 /// passing this exponentiated `delta` to them would be off by an `exp`.
286 pub delta: f64,
287}
288
289/// Fixed latent Gaussian scale for the exact marginal cloglog family.
290#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
291pub struct LatentCLogLogState {
292 pub latent_sd: f64,
293}
294
295impl LatentCLogLogState {
296 #[inline]
297 pub fn new(latent_sd: f64) -> Result<Self, String> {
298 if !latent_sd.is_finite() || latent_sd < 0.0 {
299 return Err(format!(
300 "latent cloglog standard deviation must be finite and >= 0, got {latent_sd}"
301 ));
302 }
303 Ok(Self { latent_sd })
304 }
305}
306
307/// Whether the inverse link exposes the Fisher-weight jet that the Firth
308/// penalty's higher-order correction consumes. Inlined here (issue #1521) so
309/// `LikelihoodSpec::supports_firth` has no upward dependency on
310/// `solver::mixture_link` — the match is over link variants all defined in this
311/// module, so the predicate is self-contained. The canonical jet evaluation
312/// still lives in `solver::mixture_link`; this is purely the classifier.
313#[inline]
314fn inverse_link_has_fisher_weight_jet(link: &InverseLink) -> bool {
315 matches!(
316 link,
317 InverseLink::Standard(StandardLink::Logit | StandardLink::Probit | StandardLink::CLogLog,)
318 | InverseLink::LatentCLogLog(_)
319 | InverseLink::Sas(_)
320 | InverseLink::BetaLogistic(_)
321 | InverseLink::Mixture(_)
322 )
323}
324
325/// Parameterized inverse-link selector used where mu/derivatives are evaluated.
326#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
327pub enum InverseLink {
328 Standard(StandardLink),
329 LatentCLogLog(LatentCLogLogState),
330 Sas(SasLinkState),
331 BetaLogistic(SasLinkState),
332 Mixture(MixtureLinkState),
333}
334
335impl InverseLink {
336 #[inline]
337 pub const fn link_function(&self) -> LinkFunction {
338 match self {
339 Self::Standard(link) => link.as_link_function(),
340 Self::LatentCLogLog(_) => LinkFunction::CLogLog,
341 Self::Sas(_) => LinkFunction::Sas,
342 Self::BetaLogistic(_) => LinkFunction::BetaLogistic,
343 Self::Mixture(_) => LinkFunction::Logit,
344 }
345 }
346
347 #[inline]
348 pub const fn mixture_state(&self) -> Option<&MixtureLinkState> {
349 match self {
350 Self::Mixture(state) => Some(state),
351 _ => None,
352 }
353 }
354
355 #[inline]
356 pub const fn sas_state(&self) -> Option<&SasLinkState> {
357 match self {
358 Self::Sas(state) | Self::BetaLogistic(state) => Some(state),
359 _ => None,
360 }
361 }
362
363 #[inline]
364 pub const fn latent_cloglog_state(&self) -> Option<&LatentCLogLogState> {
365 match self {
366 Self::LatentCLogLog(state) => Some(state),
367 _ => None,
368 }
369 }
370}
371
372/// Fixed prior family for smoothing parameters in joint HMC refinement.
373#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
374pub enum RhoPrior {
375 Flat,
376 Normal {
377 mean: f64,
378 sd: f64,
379 },
380 /// Gamma(shape, rate) conjugate hyperprior on the precision lambda = exp(rho).
381 ///
382 /// The deterministic REML/LAML objective uses the MAP-in-lambda convention
383 /// and is minimized, so this contributes `rate * exp(rho) - (shape - 1) * rho`
384 /// up to an additive constant. Samplers over rho include the +rho Jacobian
385 /// from lambda = exp(rho), so their log-density contribution is
386 /// `shape * rho - rate * exp(rho)`. For a block with effective dimension n_p
387 /// and centered quadratic
388 /// `(beta - mu)'S_p(beta - mu)`, the conditional posterior is
389 /// `Gamma(shape + n_p/2, rate + quadratic/2)` and the closed-form MAP
390 /// precision is `(shape + n_p/2 - 1) / (rate + quadratic/2)`.
391 /// `Gamma(1, 0)` is the explicit flat/default case and reproduces the
392 /// current MacKay/Tipping fixed point.
393 GammaPrecision {
394 shape: f64,
395 rate: f64,
396 },
397 /// Penalized-complexity (PC) prior on the smoothing parameter
398 /// (Simpson, Rue, Riebler, Martins, Sørbye, *Statistical Science* 2017).
399 ///
400 /// A PC prior fixes a *base* model (here the infinitely-smooth limit, where
401 /// the penalized component collapses to its null space) and puts an
402 /// exponential prior on the distance away from it. For a Gaussian smooth
403 /// with precision `λ = exp(ρ)` the relevant distance is the marginal
404 /// standard-deviation scale `d = λ^{-1/2} = exp(-ρ/2)`, and a constant-rate
405 /// penalization `p(d) = θ exp(-θ d)` induces the closed-form log-prior
406 ///
407 /// ```text
408 /// log p(ρ) = ln(θ/2) − ρ/2 − θ exp(−ρ/2).
409 /// ```
410 ///
411 /// The rate `θ` is calibrated by the single interpretable tail statement
412 /// `P(d > upper) = tail_prob`, i.e. `θ = −ln(tail_prob) / upper`. The prior
413 /// is reparameterization-invariant and shrinks toward the simpler model
414 /// (an exponential wall against under-smoothing, only a gentle linear pull
415 /// toward over-smoothing), which is exactly the Occam behaviour wanted for
416 /// high-variance flexible components. The REML/LAML objective is minimized,
417 /// so this contributes `ρ/2 + θ exp(−ρ/2)` (up to an additive constant) to
418 /// the cost, with gradient `1/2 − (θ/2) exp(−ρ/2)` and (always positive)
419 /// curvature `(θ/4) exp(−ρ/2)`.
420 PenalizedComplexity {
421 /// Upper bound `U` on the distance scale `d = exp(-ρ/2)` (the marginal
422 /// SD scale of the penalized component) in the tail statement
423 /// `P(d > U) = tail_prob`. Must be finite and strictly positive.
424 upper: f64,
425 /// Tail probability `α` in `P(d > U) = α`. Must satisfy `0 < α < 1`.
426 tail_prob: f64,
427 },
428 /// Coordinate-specific priors for models whose smoothing parameters do
429 /// not share one prior family, such as nested coefficient groups.
430 Independent(Vec<RhoPrior>),
431}
432
433impl Default for RhoPrior {
434 fn default() -> Self {
435 Self::Normal { mean: 0.0, sd: 3.0 }
436 }
437}
438
439// ---------------------------------------------------------------------------
440// Unified likelihood specification
441// ---------------------------------------------------------------------------
442//
443// `LikelihoodSpec { response: ResponseFamily, link: InverseLink }` is the
444// canonical likelihood selector. `ResponseFamily` is a pure response-
445// distribution selector that carries the per-family scalars
446// (`Tweedie { p }`, `NegativeBinomial { theta }`, `Beta { phi }`); `InverseLink`
447// is the parameterized inverse-link selector. Splitting (response, link)
448// removes the drift bug that the former flat likelihood enum allowed
449// when its variant disagreed with a separately-stored `InverseLink`.
450
451/// Pure response distribution selector — no link information.
452#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
453pub enum ResponseFamily {
454 Gaussian,
455 Binomial,
456 Poisson,
457 Tweedie {
458 p: f64,
459 },
460 NegativeBinomial {
461 theta: f64,
462 /// `true` when `theta` was supplied by the user as a held-fixed value
463 /// (`--negative-binomial-theta`, issue #983): the fit must use exactly
464 /// this overdispersion — `Var(y) = μ + μ²/θ`, IRLS weight
465 /// `W = μθ/(θ+μ)`, coefficients/covariance/SEs all reflect it — and
466 /// the inner solver must never overwrite it. `false` means `theta` is
467 /// the running seed/estimate refined from the data each inner solve
468 /// (the #802 default). Carried on the family variant — the canonical
469 /// `theta` store — so the estimated-vs-fixed contract can never desync
470 /// from the value itself; `default_scale_metadata` derives the
471 /// matching scale variant.
472 theta_fixed: bool,
473 },
474 Beta {
475 phi: f64,
476 },
477 Gamma,
478 RoystonParmar,
479}
480
481impl ResponseFamily {
482 #[inline]
483 pub const fn name(&self) -> &'static str {
484 match self {
485 Self::Gaussian => "gaussian",
486 Self::Binomial => "binomial",
487 Self::Poisson => "poisson",
488 Self::Tweedie { .. } => "tweedie",
489 Self::NegativeBinomial { .. } => "negative-binomial",
490 Self::Beta { .. } => "beta",
491 Self::Gamma => "gamma",
492 Self::RoystonParmar => "royston-parmar",
493 }
494 }
495
496 /// Closed-interval bounds for the mean (response-scale) of this family.
497 ///
498 /// Used by predict-side CI clamps that need to keep transformed bounds
499 /// within the support of the response. Beta uses strict-open `(1e-10, 1 − 1e-10)`
500 /// to avoid logit singularities; Binomial / Royston-Parmar use the closed
501 /// `[0, 1]` since they are evaluated post-transformation. Unbounded
502 /// (continuous-real or non-negative-real) families return `None` — the
503 /// caller should not clamp.
504 #[inline]
505 pub fn mean_clamp_bounds(&self) -> Option<(f64, f64)> {
506 match self {
507 Self::Binomial | Self::RoystonParmar => Some((0.0, 1.0)),
508 Self::Beta { .. } => Some((1e-10, 1.0 - 1e-10)),
509 Self::Gaussian
510 | Self::Poisson
511 | Self::Tweedie { .. }
512 | Self::NegativeBinomial { .. }
513 | Self::Gamma => None,
514 }
515 }
516
517 /// Closed numeric bounds of the **response support** — the closure of the
518 /// set of values a single observation `Y` can take — used to clamp the
519 /// *observation (prediction) interval* so a predictive band never reports
520 /// values the response can never attain.
521 ///
522 /// This is deliberately distinct from [`Self::mean_clamp_bounds`], which
523 /// governs the *mean* (confidence) interval. `mean_clamp_bounds` returns
524 /// `None` for the non-negative-real families (Poisson / Tweedie /
525 /// NegativeBinomial / Gamma) because their default mean interval is built
526 /// by transforming the η endpoints through a positive inverse link, which
527 /// cannot escape the support. The observation interval, by contrast, is the
528 /// symmetric response-scale band `μ ± z·σ_pred`; for a small fitted mean its
529 /// lower endpoint crosses below the support floor (e.g. a Poisson count band
530 /// going negative), so it must be floored at the response support here.
531 ///
532 /// The lower edge is the infimum of the support (`0` for every non-negative
533 /// family, including the open-at-zero Gamma, whose predictive lower bound is
534 /// reported at the boundary `0`). The upper edge is `+∞` where the response
535 /// is unbounded above, which leaves the upper band untouched, or `1` for the
536 /// `[0, 1]`-valued families. `None` means the response is supported on the
537 /// whole real line (Gaussian) or has its support enforced downstream
538 /// (Royston–Parmar), and the predictive band is passed through unclamped.
539 ///
540 /// The match arms mirror [`Self::response_support_contains`]: a new family
541 /// must update both together so the support a value is validated against and
542 /// the support a predictive band is clamped to stay consistent.
543 #[inline]
544 pub fn response_support_bounds(&self) -> Option<(f64, f64)> {
545 match self {
546 Self::Gamma | Self::Poisson | Self::NegativeBinomial { .. } | Self::Tweedie { .. } => {
547 Some((0.0, f64::INFINITY))
548 }
549 Self::Beta { .. } | Self::Binomial => Some((0.0, 1.0)),
550 Self::Gaussian | Self::RoystonParmar => None,
551 }
552 }
553
554 /// Per-family textual description of the response-support requirement.
555 /// `None` means the family is supported on the entire real line at the
556 /// validation layer (Gaussian) or has its support enforced by a downstream
557 /// pathway (RoystonParmar via the survival pipeline).
558 ///
559 /// `Binomial` is the scalar Bernoulli-logit family: with no per-row trial
560 /// count `mᵢ`, the log-likelihood is `ℓ(η) = y·η − log(1 + eη)`, which is
561 /// unbounded above for `y ∉ {0, 1}` (as `η → ∞`, `ℓ ~ (y − 1)·η`), and the
562 /// binomial deviance term `(1 − y)·log((1 − y)/(1 − μ))` leaves its domain
563 /// for `y > 1`. The family is therefore only well-posed for `y ∈ {0, 1}`,
564 /// which the support check enforces up front rather than deferring to the
565 /// downstream binarity heuristic.
566 #[inline]
567 pub fn response_support_requirement(&self) -> Option<&'static str> {
568 match self {
569 Self::Gamma => Some("strictly positive response values (y > 0)"),
570 Self::Poisson | Self::NegativeBinomial { .. } | Self::Tweedie { .. } => {
571 Some("non-negative response values (y ≥ 0)")
572 }
573 Self::Beta { .. } => Some("response values strictly in the open interval (0, 1)"),
574 Self::Binomial => Some("binary response values (y ∈ {0, 1})"),
575 Self::Gaussian | Self::RoystonParmar => None,
576 }
577 }
578
579 /// Predicate that returns `true` iff `yi` lies in this family's response
580 /// support. Only meaningful for families with a non-trivial domain
581 /// constraint at the validation layer; `validate_response_support` calls
582 /// this only after `response_support_requirement` returns `Some`, so the
583 /// "unconstrained" families (Gaussian / RoystonParmar) never hit this code
584 /// path.
585 ///
586 /// `Binomial` accepts only an (exactly) binary value: `y` must equal `0` or
587 /// `1` to within `BINOMIAL_BINARY_TOL` — the same `{0, 1}` test used by the
588 /// auto-inference and degeneracy paths — because the scalar Bernoulli-logit
589 /// likelihood is unbounded for any other value (see
590 /// `response_support_requirement`).
591 #[inline]
592 fn response_support_contains(&self, yi: f64) -> bool {
593 match self {
594 Self::Gamma => yi.is_finite() && yi > 0.0,
595 Self::Poisson | Self::NegativeBinomial { .. } | Self::Tweedie { .. } => {
596 yi.is_finite() && yi >= 0.0
597 }
598 Self::Beta { .. } => yi.is_finite() && yi > 0.0 && yi < 1.0,
599 Self::Binomial => {
600 yi.is_finite()
601 && ((yi - 0.0).abs() < BINOMIAL_BINARY_TOL
602 || (yi - 1.0).abs() < BINOMIAL_BINARY_TOL)
603 }
604 Self::Gaussian | Self::RoystonParmar => true,
605 }
606 }
607
608 /// Human-readable family label used in domain-violation error messages
609 /// (capitalised to match user-facing prose, distinct from `name()` which
610 /// returns the lowercase canonical identifier).
611 #[inline]
612 fn response_support_label(&self) -> &'static str {
613 match self {
614 Self::Gaussian => "Gaussian",
615 Self::Binomial => "Binomial",
616 Self::Poisson => "Poisson",
617 Self::Tweedie { .. } => "Tweedie",
618 Self::NegativeBinomial { .. } => "Negative-Binomial",
619 Self::Beta { .. } => "Beta",
620 Self::Gamma => "Gamma",
621 Self::RoystonParmar => "Royston-Parmar",
622 }
623 }
624
625 /// Validate that every element of `y` lies in this family's response
626 /// support. The check is the upfront, fit-blocking enforcement of the
627 /// family's distributional support — e.g. Gamma rejects `y ≤ 0` because
628 /// the log-likelihood contains `log(y)`, Poisson rejects `y < 0` because
629 /// the log-mass contains `log(y!)`.
630 ///
631 /// Returns `Ok(())` for families whose support is the entire real line at
632 /// this layer (Gaussian) or whose support is enforced by a downstream
633 /// pathway (RoystonParmar via the survival pipeline). `Binomial` is
634 /// enforced here: only `y ∈ {0, 1}` keeps the scalar Bernoulli-logit
635 /// likelihood bounded.
636 ///
637 /// Up to `ResponseSupportViolation::MAX_REPORTED` offending row indices
638 /// are returned in the violation so the message stays bounded on large
639 /// datasets while still identifying offending rows.
640 pub fn validate_response_support(
641 &self,
642 y: ArrayView1<'_, f64>,
643 ) -> Result<(), ResponseSupportViolation> {
644 let requirement = match self.response_support_requirement() {
645 Some(r) => r,
646 None => return Ok(()),
647 };
648 let mut offending: Vec<(usize, f64)> = Vec::new();
649 let mut total_violations: usize = 0;
650 for (i, &yi) in y.iter().enumerate() {
651 if !self.response_support_contains(yi) {
652 total_violations += 1;
653 if offending.len() < ResponseSupportViolation::MAX_REPORTED {
654 offending.push((i, yi));
655 }
656 }
657 }
658 if total_violations == 0 {
659 Ok(())
660 } else {
661 Err(ResponseSupportViolation {
662 family_label: self.response_support_label(),
663 requirement,
664 offending,
665 total_violations,
666 })
667 }
668 }
669
670 /// Detect a *degenerate* response: one whose value distribution makes the
671 /// family's REML log-likelihood non-finite even though every individual
672 /// `y_i` lies inside the family's distributional support.
673 ///
674 /// Symmetric counterpart to [`Self::validate_response_support`]: support
675 /// rejects out-of-domain *values* (e.g. a negative Poisson count); this
676 /// rejects *distributions* that send the saturated MLE to a boundary at
677 /// which the score diverges. Each family answers the question for itself
678 /// — adding a new family does not require touching workflow.rs.
679 ///
680 /// Concretely:
681 /// * `Binomial` — refuses an all-zero or all-one response: the saturated
682 /// logit is ±∞ and the REML score is +∞ (issue #331).
683 /// * Every other family currently returns `Ok(())` at this layer — the
684 /// support check already guarantees enough variation to make the
685 /// log-likelihood finite.
686 pub fn validate_response_degeneracy(
687 &self,
688 y: ArrayView1<'_, f64>,
689 ) -> Result<(), ResponseDegeneracy> {
690 match self {
691 Self::Binomial => {
692 let mut saw_zero = false;
693 let mut saw_one = false;
694 for &yi in y.iter() {
695 if (yi - 0.0).abs() < BINOMIAL_BINARY_TOL {
696 saw_zero = true;
697 } else if (yi - 1.0).abs() < BINOMIAL_BINARY_TOL {
698 saw_one = true;
699 }
700 if saw_zero && saw_one {
701 return Ok(());
702 }
703 }
704 let kind = if saw_one {
705 ResponseDegeneracyKind::BinomialAllOnes
706 } else if saw_zero {
707 ResponseDegeneracyKind::BinomialAllZeros
708 } else {
709 // Reachable only for an empty response: the support check
710 // (`validate_response_support`) has already rejected any
711 // non-binary value, so every present `yᵢ` is exactly 0 or 1
712 // and at least one of `saw_zero`/`saw_one` is set whenever
713 // `y` is non-empty. An empty response carries no
714 // saturated-boundary degeneracy, so accept it here.
715 return Ok(());
716 };
717 Err(ResponseDegeneracy {
718 family_label: self.response_support_label(),
719 kind,
720 })
721 }
722 Self::Gaussian => {
723 // A Gaussian fit's marginal REML log-likelihood carries a
724 // `−n/2·log σ²` term; for an effectively-constant response the
725 // ML scale `σ → 0` drives it to `+∞`, so the outer objective
726 // rejects every seed with "reml_score must be finite, got inf"
727 // (#332). Reject pre-fit when the two-pass, mean-centred sample
728 // sd is at or below `GAUSSIAN_MIN_SAMPLE_SD`. Fewer than two
729 // observations carries no estimable scale degeneracy (the
730 // sample-size gate handles too-small data), and any non-finite
731 // value is left to the dedicated finiteness checks rather than
732 // poisoning the sd, so it is skipped here.
733 let mut count = 0usize;
734 let mut mean = 0.0f64;
735 for &yi in y.iter() {
736 if !yi.is_finite() {
737 return Ok(());
738 }
739 count += 1;
740 mean += yi;
741 }
742 if count < 2 {
743 return Ok(());
744 }
745 mean /= count as f64;
746 let mut sumsq = 0.0f64;
747 for &yi in y.iter() {
748 let d = yi - mean;
749 sumsq += d * d;
750 }
751 let sample_sd = (sumsq / (count as f64 - 1.0)).sqrt();
752 if sample_sd <= GAUSSIAN_MIN_SAMPLE_SD {
753 return Err(ResponseDegeneracy {
754 family_label: self.response_support_label(),
755 kind: ResponseDegeneracyKind::GaussianNearConstant {
756 sample_sd,
757 min_sd: GAUSSIAN_MIN_SAMPLE_SD,
758 },
759 });
760 }
761 Ok(())
762 }
763 Self::Poisson
764 | Self::Tweedie { .. }
765 | Self::NegativeBinomial { .. }
766 | Self::Beta { .. }
767 | Self::Gamma
768 | Self::RoystonParmar => Ok(()),
769 }
770 }
771
772 /// Auto-infer a likelihood family when the user did not specify one.
773 ///
774 /// Policy:
775 /// * A string-valued (`Categorical`) response column is refused —
776 /// numeric-encoded level indices (e.g. `"yes"`/`"no"` → `0.0`/`1.0`)
777 /// would otherwise be silently interpreted as a binary outcome,
778 /// producing a probability model the user never asked for.
779 /// * A strictly-binary numeric response (`Binary` kind, or `Numeric`
780 /// with only `{0, 1}` values) maps to `Binomial`.
781 /// * A non-negative integer-valued count response (every value finite,
782 /// `>= 0`, and within [`COUNT_INTEGER_TOL`] of an integer) that reaches
783 /// beyond the binary `{0, 1}` window (i.e. carries at least one value
784 /// `>= 2`) maps to `Poisson` (log link). This is the "magic-by-default"
785 /// count detection: mgcv/statsmodels users expect `0,1,2,3,...` to fit a
786 /// Poisson GLM, not an identity-link Gaussian.
787 /// * Anything else (any fractional or negative value) maps to `Gaussian`.
788 ///
789 /// The fallback to `is_binary_response` inside the `Numeric` arm is what
790 /// historically lived directly inside `resolve_family`; centralising the
791 /// policy here means every entry point (formula API, CLI, future bindings)
792 /// gets the same default-inference behaviour.
793 pub fn infer_from_response(
794 y: ArrayView1<'_, f64>,
795 y_kind: ResponseColumnKind,
796 ) -> Result<Self, ResponseInferenceRefusal> {
797 match y_kind {
798 ResponseColumnKind::Categorical { levels } => Err(ResponseInferenceRefusal {
799 reason: ResponseInferenceRefusalReason::NonNumericResponse,
800 levels,
801 }),
802 ResponseColumnKind::Binary => Ok(Self::Binomial),
803 ResponseColumnKind::Numeric => {
804 let binary = !y.is_empty()
805 && y.iter().all(|v| {
806 v.is_finite()
807 && ((*v - 0.0).abs() < BINOMIAL_BINARY_TOL
808 || (*v - 1.0).abs() < BINOMIAL_BINARY_TOL)
809 });
810 if binary {
811 return Ok(Self::Binomial);
812 }
813 // Count signature: every value finite, non-negative, and an
814 // integer within `COUNT_INTEGER_TOL`, with at least one value
815 // `>= 2` so it is not the (already-handled) binary case and not
816 // a degenerate all-zero column. A single fractional or negative
817 // value disqualifies the whole response, keeping continuous and
818 // signed data on the conservative Gaussian default.
819 let count = !y.is_empty()
820 && y.iter().all(|v| {
821 v.is_finite() && *v >= 0.0 && (*v - v.round()).abs() <= COUNT_INTEGER_TOL
822 })
823 && y.iter().any(|v| *v >= 2.0 - COUNT_INTEGER_TOL);
824 if count {
825 Ok(Self::Poisson)
826 } else {
827 Ok(Self::Gaussian)
828 }
829 }
830 }
831 }
832}
833
834/// Domain-violation detail produced by [`ResponseFamily::validate_response_support`].
835///
836/// Owns its own `Display` impl so call sites in the workflow, the CLI, and the
837/// external-design GLM path produce identical user-facing prose. The
838/// `total_violations` counter is kept distinct from `offending.len()` so the
839/// message can honestly say `(N total)` even when only the first
840/// `MAX_REPORTED` indices are surfaced.
841#[derive(Debug, Clone)]
842pub struct ResponseSupportViolation {
843 pub family_label: &'static str,
844 pub requirement: &'static str,
845 pub offending: Vec<(usize, f64)>,
846 pub total_violations: usize,
847}
848
849impl ResponseSupportViolation {
850 /// Maximum number of offending row indices reported in the error message.
851 /// Keeps the message bounded on large-scale data while still pointing
852 /// the user at concrete bad rows to inspect.
853 pub const MAX_REPORTED: usize = 5;
854
855 /// Format the violation against a specific response column name. The
856 /// column name is supplied by the caller because [`ResponseFamily`] does
857 /// not know which column the user pointed at.
858 pub fn message_for(&self, response_name: &str) -> String {
859 let shown = self
860 .offending
861 .iter()
862 .map(|(i, v)| format!("y[{i}]={v}"))
863 .collect::<Vec<_>>()
864 .join(", ");
865 let more = if self.total_violations > self.offending.len() {
866 format!(", ... ({} total)", self.total_violations)
867 } else {
868 String::new()
869 };
870 format!(
871 "{family} family requires {req}; response column '{name}' violates this constraint at row(s) [{shown}{more}]",
872 family = self.family_label,
873 req = self.requirement,
874 name = response_name,
875 )
876 }
877}
878
879impl std::fmt::Display for ResponseSupportViolation {
880 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
881 f.write_str(&self.message_for("y"))
882 }
883}
884
885impl std::error::Error for ResponseSupportViolation {}
886
887/// Absolute tolerance for the exact-`{0, 1}` test that defines the scalar
888/// Bernoulli (`Binomial`) response support.
889///
890/// The scalar `Binomial` family carries no per-row trial count, so its
891/// log-likelihood is the Bernoulli/soft-label cross-entropy
892/// `ℓ(η) = y·η − log(1 + eη)`, which is unbounded above for `y ∉ {0, 1}`.
893/// Both the auto-inference (`infer_from_response`) and degeneracy
894/// (`validate_response_degeneracy`) paths classify a value as binary by the
895/// same `1e-12` window; the support check shares this single threshold so the
896/// three layers agree on exactly which responses are admissible.
897pub const BINOMIAL_BINARY_TOL: f64 = 1.0e-12;
898
899/// Minimum admissible sample standard deviation for a `Gaussian` response.
900///
901/// A response whose two-pass, mean-centred sample sd is at or below this
902/// threshold is *effectively constant* in `f64` arithmetic: the marginal REML
903/// log-likelihood carries a `−n/2·log σ²` term that diverges to `+∞` as the
904/// fitted scale `σ → 0`, so the outer objective rejects every seed with
905/// `reml_score must be finite, got inf` (#332). The bound is chosen well below
906/// any well-conditioned scientific signal (genuine data has sd many orders of
907/// magnitude larger) yet above the f64 round-off floor, so it never trips a
908/// real fit while catching responses that carry no signal (e.g. a column read
909/// in the wrong scale, or a constant accidentally fed as the response).
910pub const GAUSSIAN_MIN_SAMPLE_SD: f64 = 1.0e-10;
911
912/// Round tolerance for recognising an integer-valued (count) response.
913///
914/// `infer_from_response` classifies a numeric response as a Poisson count when
915/// every value is finite, non-negative, and within this window of its nearest
916/// non-negative integer. The threshold is looser than [`BINOMIAL_BINARY_TOL`]
917/// because count columns frequently arrive as `f64` round-trips of integers
918/// (CSV parse, integer→double promotion) that accumulate ULP-scale error well
919/// above `1e-12`; `1e-9` admits those without ever matching genuinely
920/// continuous data, whose fractional parts are O(1).
921pub const COUNT_INTEGER_TOL: f64 = 1.0e-9;
922
923/// Classifier for a [`ResponseDegeneracy`]. Each variant carries the family-
924/// specific evidence the caller needs to format a useful message without
925/// having to re-derive the diagnostic.
926#[derive(Debug, Clone)]
927pub enum ResponseDegeneracyKind {
928 /// Bernoulli / Binomial response with every observed value equal to 0.
929 BinomialAllZeros,
930 /// Bernoulli / Binomial response with every observed value equal to 1.
931 BinomialAllOnes,
932 /// Gaussian response that is effectively constant in `f64` arithmetic
933 /// (sample standard deviation at or below [`GAUSSIAN_MIN_SAMPLE_SD`]). The
934 /// marginal REML log-likelihood `−n/2·log σ²` diverges to `+∞` as the
935 /// fitted scale `σ → 0`, so every outer evaluation rejects with a
936 /// non-finite score. Carries the observed `sample_sd` and the `min_sd`
937 /// threshold so the message can quote both verbatim (#332).
938 GaussianNearConstant {
939 /// The two-pass, mean-centred sample standard deviation of the response.
940 sample_sd: f64,
941 /// The rejection threshold ([`GAUSSIAN_MIN_SAMPLE_SD`]).
942 min_sd: f64,
943 },
944}
945
946/// Degenerate-response detail produced by
947/// [`ResponseFamily::validate_response_degeneracy`].
948///
949/// Mirrors [`ResponseSupportViolation`]: it owns its own `Display` and
950/// `message_for(column_name)` so call sites in the workflow, the CLI, and
951/// any future binding produce identical user-facing prose without coupling
952/// each one to the family-internal classifier.
953#[derive(Debug, Clone)]
954pub struct ResponseDegeneracy {
955 pub family_label: &'static str,
956 pub kind: ResponseDegeneracyKind,
957}
958
959impl ResponseDegeneracy {
960 /// Format the degeneracy against a specific response column name. The
961 /// column name is supplied by the caller because [`ResponseFamily`] does
962 /// not know which column the user pointed at.
963 pub fn message_for(&self, response_name: &str) -> String {
964 match self.kind {
965 ResponseDegeneracyKind::BinomialAllZeros => format!(
966 "{family} response '{name}' is degenerate: all values are 0 (no events). \
967 The maximum-likelihood logit is −∞ at this boundary, so the REML score \
968 is not finite. Fix: ensure the response contains at least one 0 and \
969 at least one 1 (e.g. drop the offending subgroup, or refit on a pooled \
970 sample that includes both classes).",
971 family = self.family_label,
972 name = response_name,
973 ),
974 ResponseDegeneracyKind::BinomialAllOnes => format!(
975 "{family} response '{name}' is degenerate: all values are 1 (no non-events). \
976 The maximum-likelihood logit is +∞ at this boundary, so the REML score \
977 is not finite. Fix: ensure the response contains at least one 0 and \
978 at least one 1 (e.g. drop the offending subgroup, or refit on a pooled \
979 sample that includes both classes).",
980 family = self.family_label,
981 name = response_name,
982 ),
983 ResponseDegeneracyKind::GaussianNearConstant { sample_sd, min_sd } => format!(
984 "{family} response '{name}' is effectively constant (sample sd ~ {sample_sd:.3e} \
985 <= {min_sd:.0e}); the marginal REML log-likelihood −n/2·log σ² diverges to \
986 +∞ as σ → 0. Fix: check the response column units (is it being read in the \
987 right scale?), centre/rescale the response, or drop the column if it carries \
988 no signal.",
989 family = self.family_label,
990 name = response_name,
991 ),
992 }
993 }
994}
995
996impl std::fmt::Display for ResponseDegeneracy {
997 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
998 f.write_str(&self.message_for("y"))
999 }
1000}
1001
1002impl std::error::Error for ResponseDegeneracy {}
1003
1004/// Caller-supplied description of the response column's *source* kind.
1005///
1006/// `Categorical { levels }` flags a column that arrived as non-numeric strings
1007/// (the ingest layer encoded its levels to `0.0, 1.0, ...` indices) — the
1008/// `levels` list is preserved so the auto-inference refusal can echo them
1009/// back to the user verbatim. `Binary` is the ingest-layer signal that a
1010/// numeric column already contains only `{0, 1}` (used to short-circuit the
1011/// scan inside [`ResponseFamily::infer_from_response`]). `Numeric` is the
1012/// generic continuous case.
1013#[derive(Debug, Clone)]
1014pub enum ResponseColumnKind {
1015 Numeric,
1016 Binary,
1017 Categorical { levels: Vec<String> },
1018}
1019
1020/// Reason [`ResponseFamily::infer_from_response`] refused to pick a default
1021/// family. Kept as an enum so future policy extensions (e.g. "refuse on
1022/// constant response" — currently a separate CLI-side check) can be added
1023/// without breaking the call site's match arms.
1024#[derive(Debug, Clone)]
1025pub enum ResponseInferenceRefusalReason {
1026 NonNumericResponse,
1027}
1028
1029/// Auto-inference refusal carrying the levels seen in the source column so
1030/// the workflow error can echo them in its message.
1031#[derive(Debug, Clone)]
1032pub struct ResponseInferenceRefusal {
1033 pub reason: ResponseInferenceRefusalReason,
1034 pub levels: Vec<String>,
1035}
1036
1037impl ResponseInferenceRefusal {
1038 /// Format the refusal against a specific response column name.
1039 pub fn message_for(&self, response_name: &str) -> String {
1040 match self.reason {
1041 ResponseInferenceRefusalReason::NonNumericResponse => {
1042 let n = self.levels.len().min(5);
1043 let head = self
1044 .levels
1045 .iter()
1046 .take(n)
1047 .map(|s| format!("'{s}'"))
1048 .collect::<Vec<_>>()
1049 .join(", ");
1050 let preview = if self.levels.len() > n {
1051 format!("[{head}, ...]")
1052 } else {
1053 format!("[{head}]")
1054 };
1055 format!(
1056 "response column '{name}' contains non-numeric values {preview}. \
1057 Did you mean to use family='binomial' for a binary outcome, \
1058 or does '{name}' contain categorical labels that should be encoded first?",
1059 name = response_name,
1060 preview = preview,
1061 )
1062 }
1063 }
1064 }
1065}
1066
1067impl std::fmt::Display for ResponseInferenceRefusal {
1068 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1069 f.write_str(&self.message_for("y"))
1070 }
1071}
1072
1073impl std::error::Error for ResponseInferenceRefusal {}
1074
1075/// Unified likelihood specification: response distribution + parameterized link.
1076///
1077/// `ResponseFamily` carries the per-family scalars (Tweedie p, NegBin theta,
1078/// Beta phi); `InverseLink` carries the parameterized link state. Together
1079/// they replace the former flat likelihood enum.
1080///
1081/// Only the legal `(response, link)` cells enumerated by [`LikelihoodSpec::kind`]
1082/// are representable through the public surface: [`LikelihoodSpec::try_new`]
1083/// validates the legal matrix on construction, and deserialization routes
1084/// through [`LikelihoodSpecWire`] (`#[serde(try_from / into)]`) so saved bytes
1085/// cannot resurrect an illegal cell. The on-wire shape is byte-identical to the
1086/// historical `{ response, link }` struct, so legal saved models load unchanged.
1087#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1088#[serde(try_from = "LikelihoodSpecWire", into = "LikelihoodSpecWire")]
1089pub struct LikelihoodSpec {
1090 pub response: ResponseFamily,
1091 pub link: InverseLink,
1092}
1093
1094/// Transparent serde shadow of [`LikelihoodSpec`] with the identical wire shape
1095/// (`response`, `link`). All (de)serialization of `LikelihoodSpec` routes
1096/// through this type so the legal-matrix check in
1097/// [`TryFrom<LikelihoodSpecWire>`] runs on every load, closing the
1098/// saved-bytes hole: an illegal `(response, link)` cell deserializes into a
1099/// serde error instead of a silently-masked spec.
1100#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1101pub struct LikelihoodSpecWire {
1102 pub response: ResponseFamily,
1103 pub link: InverseLink,
1104}
1105
1106impl From<LikelihoodSpec> for LikelihoodSpecWire {
1107 #[inline]
1108 fn from(spec: LikelihoodSpec) -> Self {
1109 Self {
1110 response: spec.response,
1111 link: spec.link,
1112 }
1113 }
1114}
1115
1116impl TryFrom<LikelihoodSpecWire> for LikelihoodSpec {
1117 type Error = IllegalLikelihoodCell;
1118
1119 #[inline]
1120 fn try_from(wire: LikelihoodSpecWire) -> Result<Self, Self::Error> {
1121 Self::try_new(wire.response, wire.link)
1122 }
1123}
1124
1125/// Error returned when an illegal `(ResponseFamily, InverseLink)` cell is
1126/// presented to [`LikelihoodSpec::try_new`] or surfaced during
1127/// deserialization. Only the cells enumerated by [`LikelihoodSpec::kind`] are
1128/// legal; every other product cell would silently mask a wrong response
1129/// transformation (e.g. `Poisson + Identity` predicting `μ = η`, which can go
1130/// negative).
1131#[derive(Debug, Clone, PartialEq)]
1132pub struct IllegalLikelihoodCell {
1133 pub response: &'static str,
1134 pub link: &'static str,
1135}
1136
1137impl std::fmt::Display for IllegalLikelihoodCell {
1138 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1139 write!(
1140 f,
1141 "illegal likelihood cell: response `{}` does not admit inverse link `{}`. \
1142 Each non-binomial family is pinned to one link (Gaussian/Royston-Parmar→identity, \
1143 Poisson/Gamma/Tweedie/Negative-Binomial→log, Beta→logit); the binomial family \
1144 admits logit/probit/cloglog and the latent-cloglog/SAS/beta-logistic/blended \
1145 links, but not identity/log.",
1146 self.response, self.link
1147 )
1148 }
1149}
1150
1151impl std::error::Error for IllegalLikelihoodCell {}
1152
1153/// Legal-only enumeration of the `(ResponseFamily, InverseLink)` cells the
1154/// engine recognises. `LikelihoodSpec` is the product type with ~40 nominal
1155/// cells (8 response variants × 5 inverse-link variants), but only the cells
1156/// listed here are honoured by the family math; the rest are silently masked
1157/// by fallback arms. `FamilySpecKind` is the canonical projection used by
1158/// naming, predicates, and dispatch.
1159#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1160pub enum FamilySpecKind {
1161 GaussianIdentity,
1162 PoissonLog,
1163 GammaLog,
1164 TweedieLog { p: f64 },
1165 NegativeBinomialLog { theta: f64 },
1166 BetaLogit { phi: f64 },
1167 RoystonParmar,
1168 BinomialLogit,
1169 BinomialProbit,
1170 BinomialCLogLog,
1171 BinomialLatentCLogLog(LatentCLogLogState),
1172 BinomialSas(SasLinkState),
1173 BinomialBetaLogistic(SasLinkState),
1174 BinomialMixture(MixtureLinkState),
1175}
1176
1177impl FamilySpecKind {
1178 /// Short identifier matching the legacy `LikelihoodSpec::name()` strings.
1179 #[inline]
1180 pub const fn name(&self) -> &'static str {
1181 match self {
1182 Self::GaussianIdentity => "gaussian",
1183 Self::PoissonLog => "poisson-log",
1184 Self::TweedieLog { .. } => "tweedie-log",
1185 Self::NegativeBinomialLog { .. } => "negative-binomial-log",
1186 Self::BetaLogit { .. } => "beta-regression-logit",
1187 Self::GammaLog => "gamma-log",
1188 Self::RoystonParmar => "royston-parmar",
1189 Self::BinomialLogit => "binomial-logit",
1190 Self::BinomialProbit => "binomial-probit",
1191 Self::BinomialCLogLog => "binomial-cloglog",
1192 Self::BinomialLatentCLogLog(_) => "latent-cloglog-binomial",
1193 Self::BinomialSas(_) => "binomial-sas",
1194 Self::BinomialBetaLogistic(_) => "binomial-beta-logistic",
1195 Self::BinomialMixture(_) => "binomial-blended-inverse-link",
1196 }
1197 }
1198
1199 /// Human-readable label matching the legacy `LikelihoodSpec::pretty_name()` strings.
1200 #[inline]
1201 pub const fn pretty_name(&self) -> &'static str {
1202 match self {
1203 Self::GaussianIdentity => "Gaussian Identity",
1204 Self::PoissonLog => "Poisson Log",
1205 Self::TweedieLog { .. } => "Tweedie Log",
1206 Self::NegativeBinomialLog { .. } => "Negative-Binomial Log",
1207 Self::BetaLogit { .. } => "Beta Regression Logit",
1208 Self::GammaLog => "Gamma Log",
1209 Self::RoystonParmar => "Royston Parmar",
1210 Self::BinomialLogit => "Binomial Logit",
1211 Self::BinomialProbit => "Binomial Probit",
1212 Self::BinomialCLogLog => "Binomial CLogLog",
1213 Self::BinomialLatentCLogLog(_) => "Latent CLogLog Binomial",
1214 Self::BinomialSas(_) => "Binomial SAS",
1215 Self::BinomialBetaLogistic(_) => "Binomial Beta-Logistic",
1216 Self::BinomialMixture(_) => "Binomial Blended Inverse-Link",
1217 }
1218 }
1219
1220 #[inline]
1221 pub const fn is_binomial(&self) -> bool {
1222 matches!(
1223 self,
1224 Self::BinomialLogit
1225 | Self::BinomialProbit
1226 | Self::BinomialCLogLog
1227 | Self::BinomialLatentCLogLog(_)
1228 | Self::BinomialSas(_)
1229 | Self::BinomialBetaLogistic(_)
1230 | Self::BinomialMixture(_)
1231 )
1232 }
1233
1234 #[inline]
1235 pub const fn is_gaussian_identity(&self) -> bool {
1236 matches!(self, Self::GaussianIdentity)
1237 }
1238
1239 #[inline]
1240 pub const fn is_royston_parmar(&self) -> bool {
1241 matches!(self, Self::RoystonParmar)
1242 }
1243
1244 #[inline]
1245 pub const fn is_latent_cloglog(&self) -> bool {
1246 matches!(self, Self::BinomialLatentCLogLog(_))
1247 }
1248
1249 #[inline]
1250 pub const fn is_binomial_mixture(&self) -> bool {
1251 matches!(self, Self::BinomialMixture(_))
1252 }
1253
1254 #[inline]
1255 pub const fn is_binomial_sas(&self) -> bool {
1256 matches!(self, Self::BinomialSas(_))
1257 }
1258
1259 #[inline]
1260 pub const fn is_binomial_beta_logistic(&self) -> bool {
1261 matches!(self, Self::BinomialBetaLogistic(_))
1262 }
1263
1264 /// Coarse kind-level Firth eligibility: every binomial inverse link this
1265 /// enum can represent (Logit/Probit/CLogLog and the stateful
1266 /// LatentCLogLog/SAS/Beta-Logistic/Mixture links) carries a Fisher-weight
1267 /// jet, so kind-level Firth support is exactly binomial membership.
1268 ///
1269 /// The authoritative, link-resolved gate is
1270 /// [`LikelihoodSpec::supports_firth`], which routes through
1271 /// `inverse_link_has_fisher_weight_jet`. Keep this in agreement with that
1272 /// predicate: a future binomial link without a Fisher-weight jet would make
1273 /// this approximation diverge and must be handled at both sites.
1274 #[inline]
1275 pub const fn supports_firth(&self) -> bool {
1276 self.is_binomial()
1277 }
1278}
1279
1280impl LikelihoodSpec {
1281 /// Unchecked constructor: assembles a `(response, link)` cell *without*
1282 /// validating the legal matrix. Reserved for the in-crate named const
1283 /// constructors below (`gaussian_identity`, `poisson_log`, `beta_logit`,
1284 /// the `binomial_*` family, …), every one of which builds a cell that is
1285 /// legal by construction. The public, fallible entry point for an arbitrary
1286 /// `(response, link)` pair is [`LikelihoodSpec::try_new`]; the serde path
1287 /// also validates via [`LikelihoodSpecWire`]. Do not expose illegal cells
1288 /// through this method.
1289 #[inline]
1290 pub const fn new(response: ResponseFamily, link: InverseLink) -> Self {
1291 Self { response, link }
1292 }
1293
1294 /// Returns `true` when the `(response, link)` pair is one of the legal cells
1295 /// the family math honours — exactly the cells enumerated by
1296 /// [`LikelihoodSpec::kind`] before any masking. Each non-binomial response
1297 /// is pinned to a single inverse link; the binomial family admits its full
1298 /// set of probability links but never the identity/log standard links.
1299 #[inline]
1300 pub fn is_legal_cell(response: &ResponseFamily, link: &InverseLink) -> bool {
1301 match response {
1302 // Pure-identity families.
1303 ResponseFamily::Gaussian | ResponseFamily::RoystonParmar => {
1304 matches!(link, InverseLink::Standard(StandardLink::Identity))
1305 }
1306 // Log-link families.
1307 ResponseFamily::Poisson
1308 | ResponseFamily::Gamma
1309 | ResponseFamily::Tweedie { .. }
1310 | ResponseFamily::NegativeBinomial { .. } => {
1311 matches!(link, InverseLink::Standard(StandardLink::Log))
1312 }
1313 // Logit-link family.
1314 ResponseFamily::Beta { .. } => {
1315 matches!(link, InverseLink::Standard(StandardLink::Logit))
1316 }
1317 // Binomial admits every probability link except the inert
1318 // identity/log standard links.
1319 ResponseFamily::Binomial => match link {
1320 InverseLink::Standard(
1321 StandardLink::Logit | StandardLink::Probit | StandardLink::CLogLog,
1322 ) => true,
1323 InverseLink::Standard(StandardLink::Identity | StandardLink::Log) => false,
1324 InverseLink::LatentCLogLog(_)
1325 | InverseLink::Sas(_)
1326 | InverseLink::BetaLogistic(_)
1327 | InverseLink::Mixture(_) => true,
1328 },
1329 }
1330 }
1331
1332 /// Fallible constructor over an arbitrary `(response, link)` pair. Validates
1333 /// the legal matrix ([`LikelihoodSpec::is_legal_cell`]) so that an illegal
1334 /// cell — one whose stored link would drive a wrong response transformation
1335 /// — is rejected instead of silently masked by [`LikelihoodSpec::kind`].
1336 #[inline]
1337 pub fn try_new(
1338 response: ResponseFamily,
1339 link: InverseLink,
1340 ) -> Result<Self, IllegalLikelihoodCell> {
1341 if Self::is_legal_cell(&response, &link) {
1342 Ok(Self::new(response, link))
1343 } else {
1344 Err(IllegalLikelihoodCell {
1345 response: response.name(),
1346 link: link.link_function().name(),
1347 })
1348 }
1349 }
1350
1351 #[inline]
1352 pub const fn gaussian_identity() -> Self {
1353 Self::new(
1354 ResponseFamily::Gaussian,
1355 InverseLink::Standard(StandardLink::Identity),
1356 )
1357 }
1358
1359 #[inline]
1360 pub const fn binomial_logit() -> Self {
1361 Self::new(
1362 ResponseFamily::Binomial,
1363 InverseLink::Standard(StandardLink::Logit),
1364 )
1365 }
1366
1367 #[inline]
1368 pub const fn binomial_probit() -> Self {
1369 Self::new(
1370 ResponseFamily::Binomial,
1371 InverseLink::Standard(StandardLink::Probit),
1372 )
1373 }
1374
1375 #[inline]
1376 pub const fn binomial_cloglog() -> Self {
1377 Self::new(
1378 ResponseFamily::Binomial,
1379 InverseLink::Standard(StandardLink::CLogLog),
1380 )
1381 }
1382
1383 #[inline]
1384 pub const fn binomial_latent_cloglog(state: LatentCLogLogState) -> Self {
1385 Self::new(ResponseFamily::Binomial, InverseLink::LatentCLogLog(state))
1386 }
1387
1388 #[inline]
1389 pub const fn binomial_sas(state: SasLinkState) -> Self {
1390 Self::new(ResponseFamily::Binomial, InverseLink::Sas(state))
1391 }
1392
1393 #[inline]
1394 pub const fn binomial_beta_logistic(state: SasLinkState) -> Self {
1395 Self::new(ResponseFamily::Binomial, InverseLink::BetaLogistic(state))
1396 }
1397
1398 #[inline]
1399 pub fn binomial_mixture(state: MixtureLinkState) -> Self {
1400 Self::new(ResponseFamily::Binomial, InverseLink::Mixture(state))
1401 }
1402
1403 #[inline]
1404 pub const fn poisson_log() -> Self {
1405 Self::new(
1406 ResponseFamily::Poisson,
1407 InverseLink::Standard(StandardLink::Log),
1408 )
1409 }
1410
1411 #[inline]
1412 pub const fn tweedie_log(p: f64) -> Self {
1413 Self::new(
1414 ResponseFamily::Tweedie { p },
1415 InverseLink::Standard(StandardLink::Log),
1416 )
1417 }
1418
1419 /// Estimated-theta NB spec: `theta` is the seed, refined by the inner
1420 /// solver (#802 default).
1421 #[inline]
1422 pub const fn negative_binomial_log(theta: f64) -> Self {
1423 Self::new(
1424 ResponseFamily::NegativeBinomial {
1425 theta,
1426 theta_fixed: false,
1427 },
1428 InverseLink::Standard(StandardLink::Log),
1429 )
1430 }
1431
1432 /// Fixed-theta NB spec: the fit holds `theta` at exactly this value
1433 /// (`--negative-binomial-theta`, issue #983).
1434 #[inline]
1435 pub const fn negative_binomial_log_fixed(theta: f64) -> Self {
1436 Self::new(
1437 ResponseFamily::NegativeBinomial {
1438 theta,
1439 theta_fixed: true,
1440 },
1441 InverseLink::Standard(StandardLink::Log),
1442 )
1443 }
1444
1445 #[inline]
1446 pub const fn beta_logit(phi: f64) -> Self {
1447 Self::new(
1448 ResponseFamily::Beta { phi },
1449 InverseLink::Standard(StandardLink::Logit),
1450 )
1451 }
1452
1453 #[inline]
1454 pub const fn gamma_log() -> Self {
1455 Self::new(
1456 ResponseFamily::Gamma,
1457 InverseLink::Standard(StandardLink::Log),
1458 )
1459 }
1460
1461 #[inline]
1462 pub const fn royston_parmar() -> Self {
1463 Self::new(
1464 ResponseFamily::RoystonParmar,
1465 InverseLink::Standard(StandardLink::Identity),
1466 )
1467 }
1468
1469 #[inline]
1470 pub const fn link_function(&self) -> LinkFunction {
1471 self.link.link_function()
1472 }
1473
1474 /// Once-and-for-all classification into the legal-only `FamilySpecKind`.
1475 ///
1476 /// `(ResponseFamily, InverseLink)` is a 40-cell product (8 response × 5
1477 /// inverse-link); only the cells listed here are legal. Construction
1478 /// ([`LikelihoodSpec::try_new`]) and deserialization (the
1479 /// [`LikelihoodSpecWire`] `try_from`) both enforce
1480 /// [`LikelihoodSpec::is_legal_cell`], so an illegal cell can never reach
1481 /// this method. Each link-pinned family therefore matches its *one* legal
1482 /// link explicitly; the remaining (now-unreachable) illegal combinations
1483 /// are `unreachable!()` so the historical silent masking — collapsing e.g.
1484 /// `Poisson + Identity` to `PoissonLog` while the transform predicted
1485 /// `μ = η` — can never silently happen again.
1486 pub fn kind(&self) -> FamilySpecKind {
1487 // `legal_cell_kind` returns `Some` for every legal cell and `None`
1488 // for the (by-construction-unreachable) illegal ones. Construction
1489 // (`try_new`) and deserialization (`LikelihoodSpecWire` try_from)
1490 // both enforce `is_legal_cell`, so the `None` branch can never fire
1491 // on a value that exists — `.expect` is the idiomatic loud-on-
1492 // impossible-state assertion (a banned `unreachable!`/`panic!` macro
1493 // would be the same panic with worse provenance). If it ever does
1494 // fire, the message names the offending cell so the silent-masking
1495 // regression this guards against (e.g. `Poisson + Identity`
1496 // collapsing to `PoissonLog`) stays impossible.
1497 self.legal_cell_kind().expect(
1498 "illegal likelihood cell reached kind(): construction (try_new) and \
1499 deserialization (LikelihoodSpecWire) guarantee legality",
1500 )
1501 }
1502
1503 fn legal_cell_kind(&self) -> Option<FamilySpecKind> {
1504 Some(match (&self.response, &self.link) {
1505 (ResponseFamily::Gaussian, InverseLink::Standard(StandardLink::Identity)) => {
1506 FamilySpecKind::GaussianIdentity
1507 }
1508 (ResponseFamily::RoystonParmar, InverseLink::Standard(StandardLink::Identity)) => {
1509 FamilySpecKind::RoystonParmar
1510 }
1511 (ResponseFamily::Poisson, InverseLink::Standard(StandardLink::Log)) => {
1512 FamilySpecKind::PoissonLog
1513 }
1514 (ResponseFamily::Gamma, InverseLink::Standard(StandardLink::Log)) => {
1515 FamilySpecKind::GammaLog
1516 }
1517 (ResponseFamily::Tweedie { p }, InverseLink::Standard(StandardLink::Log)) => {
1518 FamilySpecKind::TweedieLog { p: *p }
1519 }
1520 (
1521 ResponseFamily::NegativeBinomial { theta, .. },
1522 InverseLink::Standard(StandardLink::Log),
1523 ) => FamilySpecKind::NegativeBinomialLog { theta: *theta },
1524 (ResponseFamily::Beta { phi }, InverseLink::Standard(StandardLink::Logit)) => {
1525 FamilySpecKind::BetaLogit { phi: *phi }
1526 }
1527 (ResponseFamily::Binomial, InverseLink::Standard(StandardLink::Logit)) => {
1528 FamilySpecKind::BinomialLogit
1529 }
1530 (ResponseFamily::Binomial, InverseLink::Standard(StandardLink::Probit)) => {
1531 FamilySpecKind::BinomialProbit
1532 }
1533 (ResponseFamily::Binomial, InverseLink::Standard(StandardLink::CLogLog)) => {
1534 FamilySpecKind::BinomialCLogLog
1535 }
1536 (ResponseFamily::Binomial, InverseLink::LatentCLogLog(state)) => {
1537 FamilySpecKind::BinomialLatentCLogLog(*state)
1538 }
1539 (ResponseFamily::Binomial, InverseLink::Sas(state)) => {
1540 FamilySpecKind::BinomialSas(*state)
1541 }
1542 (ResponseFamily::Binomial, InverseLink::BetaLogistic(state)) => {
1543 FamilySpecKind::BinomialBetaLogistic(*state)
1544 }
1545 (ResponseFamily::Binomial, InverseLink::Mixture(state)) => {
1546 FamilySpecKind::BinomialMixture(state.clone())
1547 }
1548 // Every remaining product cell is illegal. `try_new` /
1549 // `LikelihoodSpecWire::try_from` reject these, so construction and
1550 // deserialization guarantee they are unreachable here; `None`
1551 // surfaces that to `kind()`, which aborts loudly via `.expect`
1552 // rather than misclassify the family (a wrong `FamilySpecKind`
1553 // would silently corrupt every downstream likelihood/gradient
1554 // evaluation). A banned `panic!`/`unreachable!` macro would be the
1555 // same divergence with worse provenance.
1556 _ => return None,
1557 })
1558 }
1559
1560 #[inline]
1561 pub fn is_binomial(&self) -> bool {
1562 self.kind().is_binomial()
1563 }
1564
1565 #[inline]
1566 pub fn is_gaussian_identity(&self) -> bool {
1567 self.kind().is_gaussian_identity()
1568 }
1569
1570 #[inline]
1571 pub fn is_royston_parmar(&self) -> bool {
1572 self.kind().is_royston_parmar()
1573 }
1574
1575 #[inline]
1576 pub fn is_latent_cloglog(&self) -> bool {
1577 self.kind().is_latent_cloglog()
1578 }
1579
1580 #[inline]
1581 pub fn is_binomial_mixture(&self) -> bool {
1582 self.kind().is_binomial_mixture()
1583 }
1584
1585 #[inline]
1586 pub fn is_binomial_sas(&self) -> bool {
1587 self.kind().is_binomial_sas()
1588 }
1589
1590 #[inline]
1591 pub fn is_binomial_beta_logistic(&self) -> bool {
1592 self.kind().is_binomial_beta_logistic()
1593 }
1594
1595 /// Default scale metadata for this (response, link).
1596 #[inline]
1597 pub fn default_scale_metadata(&self) -> LikelihoodScaleMetadata {
1598 match &self.response {
1599 ResponseFamily::Gaussian => LikelihoodScaleMetadata::ProfiledGaussian,
1600 ResponseFamily::Gamma => LikelihoodScaleMetadata::EstimatedGammaShape { shape: 1.0 },
1601 // Binomial and Poisson have `phi ≡ 1` (variance fully pinned by the
1602 // mean), so a fixed unit dispersion is correct.
1603 ResponseFamily::Binomial | ResponseFamily::Poisson => {
1604 LikelihoodScaleMetadata::FixedDispersion { phi: 1.0 }
1605 }
1606 // Negative-Binomial's overdispersion `theta` (`Var(y)=mu+mu^2/theta`)
1607 // is a genuine free parameter estimated jointly with the mean by
1608 // default — the family-variant `theta` is only the seed, refined from
1609 // the converged-η ML score during fitting, exactly like the Gamma
1610 // shape / Beta precision / Tweedie φ. Freezing it at the seed made
1611 // every variance-derived output (coefficient/η SEs, Wald and credible
1612 // intervals, predictive intervals, `generate` draws) ignore the
1613 // data's overdispersion (issue #802). `phi` itself stays `≡ 1`.
1614 //
1615 // A user-supplied `--negative-binomial-theta` is the opposite
1616 // contract (issue #983): `theta_fixed = true` routes to the
1617 // non-estimated scale variant, so the inner solver's refresh gate
1618 // (`negbin_theta_is_estimated()`) stays closed and the fit honours
1619 // the held value everywhere it enters.
1620 ResponseFamily::NegativeBinomial { theta, theta_fixed } => {
1621 if *theta_fixed {
1622 LikelihoodScaleMetadata::FixedNegBinTheta { theta: *theta }
1623 } else {
1624 LikelihoodScaleMetadata::EstimatedNegBinTheta { theta: *theta }
1625 }
1626 }
1627 // Tweedie's dispersion `phi` is a genuine free parameter
1628 // (`Var(y) = phi · mu^p`) and is estimated jointly with the mean by
1629 // default, exactly like the Gamma shape and Beta precision. The seed
1630 // `phi = 1` is refined from the converged-η Pearson residuals during
1631 // fitting (issue #771). Freezing it at 1 made every variance-derived
1632 // output (SEs, intervals, generate draws) ignore the data's spread.
1633 ResponseFamily::Tweedie { .. } => {
1634 LikelihoodScaleMetadata::EstimatedTweediePhi { phi: 1.0 }
1635 }
1636 // Beta precision is estimated jointly with the mean by default
1637 // (magic-by-default, issue #567): the family-variant `phi` is the
1638 // seed, refined from the working residuals during fitting.
1639 ResponseFamily::Beta { phi } => LikelihoodScaleMetadata::EstimatedBetaPhi { phi: *phi },
1640 ResponseFamily::RoystonParmar => LikelihoodScaleMetadata::Unspecified,
1641 }
1642 }
1643
1644 /// Human-readable label, routed through `FamilySpecKind`.
1645 #[inline]
1646 pub fn pretty_name(&self) -> &'static str {
1647 self.kind().pretty_name()
1648 }
1649
1650 /// Short identifier, routed through `FamilySpecKind`.
1651 #[inline]
1652 pub fn name(&self) -> &'static str {
1653 self.kind().name()
1654 }
1655
1656 #[inline]
1657 pub fn supports_firth(&self) -> bool {
1658 matches!(self.response, ResponseFamily::Binomial)
1659 && inverse_link_has_fisher_weight_jet(&self.link)
1660 }
1661
1662 /// Family-level fixed-dispersion contract. Returns the dispersion parameter
1663 /// `phi` that the GLM log-likelihood / weight expressions treat as fixed
1664 /// for the given `ResponseFamily`, or `None` when the family carries no
1665 /// fixed scale (profiled or jointly estimated).
1666 ///
1667 /// - `Gaussian` and `Gamma` profile/estimate the scale jointly with the
1668 /// mean, so no fixed `phi` is exposed here.
1669 /// - `Binomial` and `Poisson` are unit-scale exponential-family fits, so the
1670 /// contract is `Some(1.0)`. NegativeBinomial's overdispersion lives in
1671 /// `theta` (a separate parameter / flag), not in a free `phi`, so it also
1672 /// returns `Some(1.0)`.
1673 /// - `Tweedie { p }` carries its variance power on the family variant. Its
1674 /// free dispersion `phi` lives in `LikelihoodScaleMetadata` and is
1675 /// estimated by default (`EstimatedTweediePhi`, issue #771), so this
1676 /// family-level contract only exposes the unit seed used when callers ask
1677 /// the response family without scale metadata.
1678 /// - `Beta { phi }` carries its precision parameter directly on the family
1679 /// variant; the contract returns that exact value rather than the
1680 /// placeholder used elsewhere for unit-scale GLMs.
1681 /// - `RoystonParmar` has no GLM-style dispersion slot.
1682 #[inline]
1683 pub const fn fixed_dispersion(&self) -> Option<f64> {
1684 match self.response {
1685 ResponseFamily::Gaussian | ResponseFamily::Gamma | ResponseFamily::RoystonParmar => {
1686 None
1687 }
1688 ResponseFamily::Binomial
1689 | ResponseFamily::Poisson
1690 | ResponseFamily::Tweedie { .. }
1691 | ResponseFamily::NegativeBinomial { .. } => Some(1.0),
1692 ResponseFamily::Beta { phi } => Some(phi),
1693 }
1694 }
1695}
1696
1697#[inline]
1698pub const fn is_valid_tweedie_power(p: f64) -> bool {
1699 p.is_finite() && p > 1.0 && p < 2.0
1700}
1701
1702/// Error returned when an `InverseLink` cannot be paired with a particular
1703/// response family because the link is structurally unsupported for that
1704/// family. Carries the link name so call sites can produce a useful message
1705/// without losing the offending variant.
1706#[derive(Debug, Clone, PartialEq, Eq)]
1707pub struct UnsupportedLinkError {
1708 pub family: &'static str,
1709 pub link_name: String,
1710}
1711
1712impl UnsupportedLinkError {
1713 /// Construct an `UnsupportedLinkError` tagged with the response-family
1714 /// name (`"binomial"`, `"gaussian"`, ...) and a printable name for the
1715 /// offending `InverseLink` variant (extracted via the module-private
1716 /// `inverse_link_diagnostic_name`). No allocation beyond the link name.
1717 #[inline]
1718 pub fn new(family: &'static str, link: &InverseLink) -> Self {
1719 Self {
1720 family,
1721 link_name: inverse_link_diagnostic_name(link),
1722 }
1723 }
1724}
1725
1726impl std::fmt::Display for UnsupportedLinkError {
1727 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1728 write!(
1729 f,
1730 "inverse link `{}` is not supported by the {} response family",
1731 self.link_name, self.family
1732 )
1733 }
1734}
1735
1736impl std::error::Error for UnsupportedLinkError {}
1737
1738#[inline]
1739pub fn inverse_link_diagnostic_name(link: &InverseLink) -> String {
1740 match link {
1741 InverseLink::Standard(lf) => lf.name().to_string(),
1742 InverseLink::LatentCLogLog(_) => "latent-cloglog".to_string(),
1743 InverseLink::Sas(_) => "sas".to_string(),
1744 InverseLink::BetaLogistic(_) => "beta-logistic".to_string(),
1745 InverseLink::Mixture(_) => "mixture".to_string(),
1746 }
1747}
1748
1749/// Resolve a binomial-flavoured `LikelihoodSpec` from an `InverseLink`.
1750///
1751/// `StandardLink::Logit | Probit | CLogLog` and the state-bearing
1752/// `LatentCLogLog / Sas / BetaLogistic / Mixture` variants are accepted as
1753/// binomial-compatible. `StandardLink::Log | Identity` have no canonical
1754/// binomial meaning and return `UnsupportedLinkError`. Since
1755/// `InverseLink::Standard` carries `StandardLink` (not `LinkFunction`), the
1756/// previously-required `Standard(LinkFunction::Sas | BetaLogistic)` arm is
1757/// structurally impossible and has been removed.
1758#[inline]
1759pub fn inverse_link_to_binomial_spec(
1760 link: &InverseLink,
1761) -> Result<LikelihoodSpec, UnsupportedLinkError> {
1762 match link {
1763 InverseLink::Standard(StandardLink::Logit)
1764 | InverseLink::Standard(StandardLink::Probit)
1765 | InverseLink::Standard(StandardLink::CLogLog) => {
1766 Ok(LikelihoodSpec::new(ResponseFamily::Binomial, link.clone()))
1767 }
1768 InverseLink::LatentCLogLog(_)
1769 | InverseLink::Sas(_)
1770 | InverseLink::BetaLogistic(_)
1771 | InverseLink::Mixture(_) => {
1772 Ok(LikelihoodSpec::new(ResponseFamily::Binomial, link.clone()))
1773 }
1774 InverseLink::Standard(StandardLink::Log)
1775 | InverseLink::Standard(StandardLink::Identity) => {
1776 Err(UnsupportedLinkError::new("binomial", link))
1777 }
1778 }
1779}
1780
1781/// How a likelihood's scale parameter is handled by the fit/result contract.
1782#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
1783pub enum LikelihoodScaleMetadata {
1784 /// Gaussian identity fits profile sigma outside the fixed-scale GLM machinery.
1785 ProfiledGaussian,
1786 /// Fixed exponential-dispersion parameter `phi`.
1787 FixedDispersion { phi: f64 },
1788 /// Fixed Gamma shape `k`, equivalent to `phi = 1 / k`.
1789 FixedGammaShape { shape: f64 },
1790 /// Gamma shape `k` estimated jointly with the mean model.
1791 EstimatedGammaShape { shape: f64 },
1792 /// Beta-regression precision `phi` estimated jointly with the mean model.
1793 /// `Var(y) = mu(1-mu)/(1+phi)`; larger `phi` means less noise. Estimated
1794 /// from the working residuals after each mean fit and refreshed across outer
1795 /// iterations, exactly like the Gamma shape (issue #567).
1796 EstimatedBetaPhi { phi: f64 },
1797 /// Tweedie exponential-dispersion `phi` estimated jointly with the mean
1798 /// model. `Var(y) = phi · mu^p` with `phi` a genuine free parameter (unlike
1799 /// Binomial/Poisson, where `phi ≡ 1`). Estimated by the Pearson moment
1800 /// estimator `phî = Σ wᵢ (yᵢ − μᵢ)² / μᵢ^p / Σ wᵢ` at the converged η and
1801 /// refreshed across outer iterations, exactly like the Gamma shape and the
1802 /// Beta precision. `phi` enters the IRLS working weight `prior·μ^{2−p}/phi`,
1803 /// so the coefficient covariance `Vb = H⁻¹` already scales as `phi` and the
1804 /// reported SEs track `√phi` (issue #771).
1805 EstimatedTweediePhi { phi: f64 },
1806 /// Negative-Binomial overdispersion `theta` estimated jointly with the mean
1807 /// model. `Var(y) = mu + mu^2 / theta`; larger `theta` means less
1808 /// overdispersion (the Poisson limit is `theta → ∞`). Estimated by the
1809 /// maximum-likelihood `theta` score
1810 /// `Σ wᵢ[ψ(yᵢ+θ) − ψ(θ) + lnθ + 1 − ln(θ+μᵢ) − (yᵢ+θ)/(μᵢ+θ)] = 0` at the
1811 /// converged η (MASS `glm.nb`'s `theta.ml`) and refreshed across outer
1812 /// iterations, exactly like the Gamma shape / Beta precision / Tweedie φ.
1813 /// Unlike those, `theta` is *not* a dispersion scale `phi`: it enters only
1814 /// the IRLS working weight `W = μθ/(θ+μ)` (the full NB2 Fisher information),
1815 /// so the stored penalized Hessian is already the true one and the
1816 /// coefficient covariance `Vb = H⁻¹` takes no post-hoc multiply — `phi ≡ 1`
1817 /// for NB, the overdispersion lives in the variance function. The `theta`
1818 /// carried here mirrors `ResponseFamily::NegativeBinomial { theta }` (the
1819 /// canonical store every weight/deviance expression reads), kept in sync by
1820 /// `with_negbin_theta`, exactly as `EstimatedBetaPhi` mirrors `Beta { phi }`
1821 /// (issue #802).
1822 EstimatedNegBinTheta { theta: f64 },
1823 /// Negative-Binomial overdispersion `theta` held fixed at a user-supplied
1824 /// value (`--negative-binomial-theta`, issue #983). Identical role to
1825 /// `EstimatedNegBinTheta` in every weight / variance / covariance
1826 /// expression (`W = μθ/(θ+μ)`, `Var(y) = μ + μ²/θ`, `phi ≡ 1`), but the
1827 /// inner solver's ML refresh is gated off: the recorded `theta` is the
1828 /// user's, by construction. The fixed/estimated split mirrors
1829 /// `FixedGammaShape` vs `EstimatedGammaShape`.
1830 FixedNegBinTheta { theta: f64 },
1831 /// The engine does not expose fixed-scale semantics for this family.
1832 Unspecified,
1833}
1834
1835impl LikelihoodScaleMetadata {
1836 #[inline]
1837 pub const fn fixed_phi(self) -> Option<f64> {
1838 match self {
1839 Self::FixedDispersion { phi }
1840 | Self::EstimatedBetaPhi { phi }
1841 | Self::EstimatedTweediePhi { phi } => Some(phi),
1842 Self::FixedGammaShape { shape } | Self::EstimatedGammaShape { shape } => {
1843 Some(1.0 / shape)
1844 }
1845 // NB's dispersion scale is `phi ≡ 1` (the overdispersion is carried
1846 // by `theta` inside the variance function, not a scale multiply), so
1847 // the fixed-`phi` contract is `Some(1.0)` — NOT `theta`.
1848 Self::EstimatedNegBinTheta { .. } | Self::FixedNegBinTheta { .. } => Some(1.0),
1849 Self::ProfiledGaussian | Self::Unspecified => None,
1850 }
1851 }
1852
1853 /// Whether the Negative-Binomial overdispersion `theta` is estimated from
1854 /// data (the default for NB families, issue #802).
1855 #[inline]
1856 pub const fn negbin_theta_is_estimated(self) -> bool {
1857 matches!(self, Self::EstimatedNegBinTheta { .. })
1858 }
1859
1860 /// The Negative-Binomial `theta` carried in the scale metadata (estimated
1861 /// or user-fixed), or `None` for non-NB families.
1862 #[inline]
1863 pub const fn negbin_theta(self) -> Option<f64> {
1864 match self {
1865 Self::EstimatedNegBinTheta { theta } | Self::FixedNegBinTheta { theta } => Some(theta),
1866 _ => None,
1867 }
1868 }
1869
1870 /// Whether the Beta-regression precision `phi` is estimated from data.
1871 #[inline]
1872 pub const fn beta_phi_is_estimated(self) -> bool {
1873 matches!(self, Self::EstimatedBetaPhi { .. })
1874 }
1875
1876 /// Whether the Tweedie exponential-dispersion `phi` is estimated from data.
1877 #[inline]
1878 pub const fn tweedie_phi_is_estimated(self) -> bool {
1879 matches!(self, Self::EstimatedTweediePhi { .. })
1880 }
1881
1882 #[inline]
1883 pub const fn gamma_shape(self) -> Option<f64> {
1884 match self {
1885 Self::FixedGammaShape { shape } | Self::EstimatedGammaShape { shape } => Some(shape),
1886 _ => None,
1887 }
1888 }
1889
1890 #[inline]
1891 pub const fn gamma_shape_is_estimated(self) -> bool {
1892 matches!(self, Self::EstimatedGammaShape { .. })
1893 }
1894}
1895
1896/// Whether a stored log-likelihood includes response-only normalization constants.
1897#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1898pub enum LogLikelihoodNormalization {
1899 Full,
1900 OmittingResponseConstants,
1901 UserProvided,
1902}
1903
1904/// Explicit GLM likelihood specification: response/link spec plus scale semantics.
1905///
1906/// `spec` is the canonical `(ResponseFamily, InverseLink)` selector. `scale`
1907/// records how the scale parameter is handled (profiled Gaussian sigma, fixed
1908/// dispersion, fixed/estimated Gamma shape). The Gamma shape is mutated in
1909/// place during PIRLS via `with_gamma_shape`; preserving that field on this
1910/// struct is what lets the inner solver thread the estimated shape into
1911/// deviance / log-likelihood / weight evaluation without a separate side
1912/// channel.
1913#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1914pub struct GlmLikelihoodSpec {
1915 pub spec: LikelihoodSpec,
1916 pub scale: LikelihoodScaleMetadata,
1917}
1918
1919impl GlmLikelihoodSpec {
1920 /// Build a `GlmLikelihoodSpec` from a `LikelihoodSpec`, deriving the
1921 /// canonical default scale metadata for the response family.
1922 #[inline]
1923 pub fn canonical(spec: LikelihoodSpec) -> Self {
1924 let scale = spec.default_scale_metadata();
1925 Self { spec, scale }
1926 }
1927
1928 #[inline]
1929 pub fn link_function(&self) -> LinkFunction {
1930 self.spec.link_function()
1931 }
1932
1933 #[inline]
1934 pub fn fixed_phi(&self) -> Option<f64> {
1935 self.scale.fixed_phi()
1936 }
1937
1938 /// Multiplier converting the stored unscaled inverse penalized Hessian
1939 /// `H⁻¹` into the reported coefficient covariance `Vb = H⁻¹ · scale`.
1940 ///
1941 /// # Invariant
1942 ///
1943 /// `Vb` is the inverse of the Hessian of the *actual penalized objective the
1944 /// inner solver minimizes*. The stored Hessian is always assembled as
1945 /// `H = XᵀWX + S_λ`, with the penalty `S_λ` added **unscaled** (see
1946 /// `pirls::penalty::add_to_hessian`). Whether `H` is already that true
1947 /// objective Hessian — and hence whether any post-hoc dispersion multiply is
1948 /// warranted — is decided entirely by what the IRLS working weight `W`
1949 /// carries:
1950 ///
1951 /// * **Working weight already carries the reciprocal dispersion / full
1952 /// Fisher information.** Then `H = Xᵀ(W_sf/φ)X + S_λ` already equals the
1953 /// true penalized Hessian (e.g. mgcv's `XᵀW_sfX/φ + S_λ` for Gamma), so
1954 /// `Vb = H⁻¹` and the scale is exactly `1.0`. This is the case for Gamma
1955 /// (`W = prior·shape = prior/φ`), Tweedie (`W = prior·μ^{2−p}/φ`), Beta
1956 /// and Negative-Binomial (the working weight is the complete fixed-scale
1957 /// Fisher information), and the fixed-scale exponential families
1958 /// Poisson/Binomial (`φ ≡ 1`). Multiplying `H⁻¹` by the dispersion again
1959 /// for any of these double-counts it and shrinks every SE by `√dispersion`.
1960 ///
1961 /// * **Working weight is scale-free** (`W = priorweights`, the profiled
1962 /// Gaussian convention). Then the data term carries an implicit unit scale
1963 /// and `H = XᵀPX + S_λ` is the Hessian of `½·(scaled deviance)·σ²⁻¹`
1964 /// *without* the `σ²`. The correct covariance restores it:
1965 /// `Vb = H⁻¹ · σ̂²`. Only this branch returns a non-unit scale.
1966 ///
1967 /// `profiled_gaussian_phi` is the profiled residual variance `σ̂²` and is
1968 /// consulted **only** for the scale-free profiled-Gaussian branch; every
1969 /// other family ignores it. This deliberately does NOT touch
1970 /// `dispersion()` / `dispersion_from_likelihood`, which still report the
1971 /// response-level observation noise (`1/shape` for Gamma, `1/(1+φ)` for
1972 /// Beta, …) used by predictive-interval construction — a distinct quantity
1973 /// from the coefficient-covariance scale defined here.
1974 #[inline]
1975 pub fn coefficient_covariance_scale(&self, profiled_gaussian_phi: f64) -> f64 {
1976 match self.scale {
1977 // Scale-free working weight: restore the profiled variance.
1978 LikelihoodScaleMetadata::ProfiledGaussian => profiled_gaussian_phi,
1979 // Working weight already carries the dispersion / full Fisher
1980 // information, so the stored H is the true penalized Hessian and no
1981 // further dispersion multiply is warranted.
1982 //
1983 // FixedDispersion covers the explicitly-scaled Gaussian submodel
1984 // (W·=1/φ above) and Negative-Binomial; the Gamma, Beta and Tweedie
1985 // variants fold their reciprocal-dispersion / precision / φ into W
1986 // (Tweedie W = prior·μ^{2−p}/φ, so the SE already scales as √φ); and
1987 // Unspecified families never expose a separate post-hoc scale.
1988 LikelihoodScaleMetadata::FixedDispersion { .. }
1989 | LikelihoodScaleMetadata::FixedGammaShape { .. }
1990 | LikelihoodScaleMetadata::EstimatedGammaShape { .. }
1991 | LikelihoodScaleMetadata::EstimatedBetaPhi { .. }
1992 | LikelihoodScaleMetadata::EstimatedTweediePhi { .. }
1993 // Negative-Binomial folds `theta` into the working weight
1994 // `W = μθ/(θ+μ)` (the full NB2 Fisher information), so the stored
1995 // `H = XᵀWX + S_λ` is already the true penalized Hessian and the
1996 // covariance scale is `1.0` (`phi ≡ 1`). The reported SEs respond to
1997 // the data's overdispersion entirely through that `theta`-dependent
1998 // weight (issue #802) — multiplying again would double-count it.
1999 // The same holds verbatim for a user-fixed `theta` (issue #983).
2000 | LikelihoodScaleMetadata::EstimatedNegBinTheta { .. }
2001 | LikelihoodScaleMetadata::FixedNegBinTheta { .. }
2002 | LikelihoodScaleMetadata::Unspecified => 1.0,
2003 }
2004 }
2005
2006 #[inline]
2007 pub fn gamma_shape(&self) -> Option<f64> {
2008 self.scale.gamma_shape()
2009 }
2010
2011 /// Mutate the Gamma shape parameter in place while preserving the rest of
2012 /// the spec. The shape only takes effect for Gamma families; for other
2013 /// families the scale metadata is left untouched.
2014 #[inline]
2015 pub fn with_gamma_shape(mut self, shape: f64) -> Self {
2016 self.scale = match self.scale {
2017 LikelihoodScaleMetadata::FixedGammaShape { .. } => {
2018 LikelihoodScaleMetadata::FixedGammaShape { shape }
2019 }
2020 LikelihoodScaleMetadata::EstimatedGammaShape { .. } => {
2021 LikelihoodScaleMetadata::EstimatedGammaShape { shape }
2022 }
2023 other => match &self.spec.response {
2024 ResponseFamily::Gamma => LikelihoodScaleMetadata::EstimatedGammaShape { shape },
2025 _ => other,
2026 },
2027 };
2028 self
2029 }
2030
2031 /// Whether the Beta-regression precision `phi` is estimated from data.
2032 #[inline]
2033 pub fn beta_phi_is_estimated(&self) -> bool {
2034 self.scale.beta_phi_is_estimated()
2035 }
2036
2037 /// Mutate the Beta precision `phi` in place, on BOTH the family variant
2038 /// (where every PIRLS weight / deviance / log-likelihood expression reads it
2039 /// via `ResponseFamily::Beta { phi }`) and the scale metadata (the
2040 /// estimated-vs-fixed contract). No-op for non-Beta families. The inner
2041 /// solver calls this once per inner solve after a moment estimate of `phi`
2042 /// from the working residuals, so the IRLS weights `Var(y)=mu(1-mu)/(1+phi)`
2043 /// reflect the true precision rather than the `phi=1` seed (issue #567).
2044 #[inline]
2045 pub fn with_beta_phi(mut self, phi: f64) -> Self {
2046 if let ResponseFamily::Beta { phi: family_phi } = &mut self.spec.response {
2047 *family_phi = phi;
2048 self.scale = LikelihoodScaleMetadata::EstimatedBetaPhi { phi };
2049 }
2050 self
2051 }
2052
2053 /// Whether the Tweedie exponential-dispersion `phi` is estimated from data.
2054 #[inline]
2055 pub fn tweedie_phi_is_estimated(&self) -> bool {
2056 self.scale.tweedie_phi_is_estimated()
2057 }
2058
2059 /// Mutate the Tweedie dispersion `phi` in place. Unlike Beta, the Tweedie
2060 /// power `p` (not `phi`) is what is carried on the `ResponseFamily::Tweedie`
2061 /// variant; the dispersion lives purely in the scale metadata and is read by
2062 /// the IRLS weight (`prior·μ^{2−p}/phi`) through `fixed_phi()`. So updating
2063 /// the metadata here is sufficient to thread the estimated `phi` into every
2064 /// weight / covariance expression. No-op for non-Tweedie families (issue
2065 /// #771).
2066 #[inline]
2067 pub fn with_tweedie_phi(mut self, phi: f64) -> Self {
2068 if matches!(self.spec.response, ResponseFamily::Tweedie { .. }) {
2069 self.scale = LikelihoodScaleMetadata::EstimatedTweediePhi { phi };
2070 }
2071 self
2072 }
2073
2074 /// Whether the Negative-Binomial overdispersion `theta` is estimated from
2075 /// data (issue #802).
2076 #[inline]
2077 pub fn negbin_theta_is_estimated(&self) -> bool {
2078 self.scale.negbin_theta_is_estimated()
2079 }
2080
2081 /// Mutate the Negative-Binomial overdispersion `theta` in place, on BOTH the
2082 /// family variant (where every PIRLS weight / deviance / log-likelihood
2083 /// expression reads it via `ResponseFamily::NegativeBinomial { theta }`) and
2084 /// the scale metadata (the estimated-vs-fixed contract). No-op for non-NB
2085 /// families. The inner solver calls this once per inner solve after a
2086 /// maximum-likelihood estimate of `theta` from the working residuals, so the
2087 /// IRLS weight `W = μθ/(θ+μ)` and the variance `Var(y)=mu+mu^2/theta` reflect
2088 /// the data's overdispersion rather than the seed `theta` (issue #802). This
2089 /// mirrors `with_beta_phi` exactly — both keep the family variant and the
2090 /// scale metadata as two synchronized views of one estimated parameter.
2091 /// No-op for a user-fixed `theta` (`theta_fixed = true` /
2092 /// `FixedNegBinTheta`, issue #983): the held value is the contract, and
2093 /// this mutator must never let an estimation path overwrite it — the
2094 /// PIRLS refresh gate (`negbin_theta_is_estimated()`) already skips the
2095 /// call, this enforces the same invariant at the data itself.
2096 #[inline]
2097 pub fn with_negbin_theta(mut self, theta: f64) -> Self {
2098 if let ResponseFamily::NegativeBinomial {
2099 theta: family_theta,
2100 theta_fixed,
2101 } = &mut self.spec.response
2102 && !*theta_fixed
2103 {
2104 *family_theta = theta;
2105 self.scale = LikelihoodScaleMetadata::EstimatedNegBinTheta { theta };
2106 }
2107 self
2108 }
2109
2110 /// The estimated Negative-Binomial `theta`, read from the family variant
2111 /// (the canonical store), or `None` for non-NB families.
2112 #[inline]
2113 pub fn negbin_theta(&self) -> Option<f64> {
2114 match self.spec.response {
2115 ResponseFamily::NegativeBinomial { theta, .. } => Some(theta),
2116 _ => None,
2117 }
2118 }
2119
2120 /// Produce a copy of this spec with the Tweedie exponential-dispersion
2121 /// `phi` PINNED at `phi` for the duration of the smoothing-parameter (λ)
2122 /// search (#1477). Converts an `EstimatedTweediePhi` scale into the
2123 /// statistically-identical `FixedDispersion` form, which gates off the
2124 /// per-inner-solve Pearson refresh in
2125 /// `GamWorkingModel::update_with_curvature` (its guard is
2126 /// `tweedie_phi_is_estimated()`, which `FixedDispersion` does not satisfy)
2127 /// while leaving every weight / variance / covariance expression unchanged
2128 /// (they read `phi` through `fixed_phi()`, which `FixedDispersion` answers
2129 /// identically).
2130 ///
2131 /// Rationale: with `phi` estimated, the inner solver re-derives it from each
2132 /// outer iterate's *warm-start* η (the Pearson moment estimator
2133 /// `phî = Σ wᵢ(yᵢ−μᵢ)²/μᵢ^p / Σ wᵢ`). The Tweedie LAML omits the
2134 /// `phi`-dependent saddlepoint normalizer `a(y,φ)` from `−ℓ(β̂)` — valid only
2135 /// when `phi` is fixed across the surface — so a drifting `phi` makes
2136 /// `F(ρ)` a non-stationary function of ρ that REWARDS dispersion inflation:
2137 /// driving a double-penalty null-space `λ` up kills a genuinely-supported
2138 /// linear trend, the residuals grow, the warm-start `phî` rises, and the
2139 /// `[yθ−κ]/φ` deviance term shrinks with no compensating normalizer penalty,
2140 /// so the criterion falls and the outer optimizer rails `λ_null` to the box
2141 /// bound (the #1477 Tweedie double-penalty boundary blow-up). Holding `phi`
2142 /// fixed across the λ-search makes `F(ρ) = REML(ρ, φ_frozen)` a genuine
2143 /// stationary function of ρ, exactly as for the Gaussian profiled scale
2144 /// (whose `(n−Mp)/2·log(2πφ̂)` normalizer is retained) and as mgcv does for
2145 /// Tweedie. `phi` is still Pearson-refreshed at the single final reported fit
2146 /// (the `refine_dispersion_at_converged_eta = true` accept-fit). No-op for
2147 /// non-Tweedie families and for a user-fixed `phi`.
2148 #[inline]
2149 pub fn with_tweedie_phi_frozen_for_search(mut self, phi: f64) -> Self {
2150 if matches!(self.spec.response, ResponseFamily::Tweedie { .. })
2151 && self.scale.tweedie_phi_is_estimated()
2152 {
2153 self.scale = LikelihoodScaleMetadata::FixedDispersion { phi };
2154 }
2155 self
2156 }
2157
2158 /// Produce a copy of this spec with the Negative-Binomial overdispersion
2159 /// `theta` PINNED at `theta` for the duration of the smoothing-parameter
2160 /// (λ) search (#1082). Converts an `EstimatedNegBinTheta` spec into the
2161 /// statistically-identical `FixedNegBinTheta` form (`theta_fixed = true`),
2162 /// which gates off the per-inner-solve ML refresh in
2163 /// `GamWorkingModel::update_with_curvature` (its guard is
2164 /// `negbin_theta_is_estimated()`).
2165 ///
2166 /// Rationale: with θ estimated, the inner solver re-derives θ from each
2167 /// outer iterate's *warm-start* η, so θ — and hence the NB working response,
2168 /// deviance and penalty-logdet that feed the REML criterion — drifts every
2169 /// outer evaluation. The outer optimizer then chases a moving target and the
2170 /// projected-gradient convergence test never trips, grinding the loop to
2171 /// `max_iter` (the #1082 negative-binomial tensor timeout). Holding θ fixed
2172 /// across the λ-search makes the REML objective `F(ρ) = REML(ρ, θ_frozen)` a
2173 /// genuine stationary function of ρ, so the loop converges in a handful of
2174 /// iterations — and θ is still ML-refreshed at the single final, reported fit
2175 /// (the `refine_dispersion_at_converged_eta = true` accept-fit), exactly as
2176 /// the function-level docs require ("estimate the scale at the converged fit,
2177 /// not inside the λ search; mgcv likewise"). No-op for non-NB families and
2178 /// for an already user-fixed θ.
2179 #[inline]
2180 pub fn with_negbin_theta_frozen_for_search(mut self, theta: f64) -> Self {
2181 if let ResponseFamily::NegativeBinomial {
2182 theta: family_theta,
2183 theta_fixed,
2184 } = &mut self.spec.response
2185 {
2186 *family_theta = theta;
2187 *theta_fixed = true;
2188 self.scale = LikelihoodScaleMetadata::FixedNegBinTheta { theta };
2189 }
2190 self
2191 }
2192}
2193
2194#[cfg(test)]
2195mod tests {
2196 use super::*;
2197 use ndarray::arr1;
2198
2199 // -----------------------------------------------------------------------
2200 // CoefficientGroupPrior::validate
2201 // -----------------------------------------------------------------------
2202
2203 #[test]
2204 fn prior_flat_always_ok() {
2205 assert!(CoefficientGroupPrior::Flat.validate("ctx").is_ok());
2206 }
2207
2208 #[test]
2209 fn prior_normal_log_precision_valid() {
2210 assert!(
2211 CoefficientGroupPrior::NormalLogPrecision { mean: 0.0, sd: 1.0 }
2212 .validate("ctx")
2213 .is_ok()
2214 );
2215 }
2216
2217 #[test]
2218 fn prior_normal_log_precision_infinite_mean_errors() {
2219 assert!(
2220 CoefficientGroupPrior::NormalLogPrecision { mean: f64::INFINITY, sd: 1.0 }
2221 .validate("ctx")
2222 .is_err()
2223 );
2224 }
2225
2226 #[test]
2227 fn prior_normal_log_precision_zero_sd_errors() {
2228 assert!(
2229 CoefficientGroupPrior::NormalLogPrecision { mean: 0.0, sd: 0.0 }
2230 .validate("ctx")
2231 .is_err()
2232 );
2233 }
2234
2235 #[test]
2236 fn prior_normal_log_precision_negative_sd_errors() {
2237 assert!(
2238 CoefficientGroupPrior::NormalLogPrecision { mean: 0.0, sd: -1.0 }
2239 .validate("ctx")
2240 .is_err()
2241 );
2242 }
2243
2244 #[test]
2245 fn prior_gamma_precision_valid() {
2246 assert!(
2247 CoefficientGroupPrior::GammaPrecision { shape: 1.0, rate: 0.0 }
2248 .validate("ctx")
2249 .is_ok()
2250 );
2251 }
2252
2253 #[test]
2254 fn prior_gamma_precision_zero_shape_errors() {
2255 assert!(
2256 CoefficientGroupPrior::GammaPrecision { shape: 0.0, rate: 1.0 }
2257 .validate("ctx")
2258 .is_err()
2259 );
2260 }
2261
2262 #[test]
2263 fn prior_gamma_precision_negative_rate_errors() {
2264 assert!(
2265 CoefficientGroupPrior::GammaPrecision { shape: 1.0, rate: -0.1 }
2266 .validate("ctx")
2267 .is_err()
2268 );
2269 }
2270
2271 #[test]
2272 fn prior_penalized_complexity_valid() {
2273 assert!(
2274 CoefficientGroupPrior::PenalizedComplexity { upper: 1.0, tail_prob: 0.05 }
2275 .validate("ctx")
2276 .is_ok()
2277 );
2278 }
2279
2280 #[test]
2281 fn prior_penalized_complexity_zero_upper_errors() {
2282 assert!(
2283 CoefficientGroupPrior::PenalizedComplexity { upper: 0.0, tail_prob: 0.05 }
2284 .validate("ctx")
2285 .is_err()
2286 );
2287 }
2288
2289 #[test]
2290 fn prior_penalized_complexity_tail_prob_zero_errors() {
2291 assert!(
2292 CoefficientGroupPrior::PenalizedComplexity { upper: 1.0, tail_prob: 0.0 }
2293 .validate("ctx")
2294 .is_err()
2295 );
2296 }
2297
2298 #[test]
2299 fn prior_penalized_complexity_tail_prob_one_errors() {
2300 assert!(
2301 CoefficientGroupPrior::PenalizedComplexity { upper: 1.0, tail_prob: 1.0 }
2302 .validate("ctx")
2303 .is_err()
2304 );
2305 }
2306
2307 // -----------------------------------------------------------------------
2308 // LatentCLogLogState::new
2309 // -----------------------------------------------------------------------
2310
2311 #[test]
2312 fn latent_cloglog_zero_sd_ok() {
2313 assert!(LatentCLogLogState::new(0.0).is_ok());
2314 }
2315
2316 #[test]
2317 fn latent_cloglog_positive_sd_ok() {
2318 assert!(LatentCLogLogState::new(1.5).is_ok());
2319 }
2320
2321 #[test]
2322 fn latent_cloglog_negative_sd_errors() {
2323 assert!(LatentCLogLogState::new(-0.1).is_err());
2324 }
2325
2326 #[test]
2327 fn latent_cloglog_infinite_sd_errors() {
2328 assert!(LatentCLogLogState::new(f64::INFINITY).is_err());
2329 }
2330
2331 #[test]
2332 fn latent_cloglog_nan_errors() {
2333 assert!(LatentCLogLogState::new(f64::NAN).is_err());
2334 }
2335
2336 // -----------------------------------------------------------------------
2337 // WigglePenaltyConfig::cubic_triple_operator_default
2338 // -----------------------------------------------------------------------
2339
2340 #[test]
2341 fn wiggle_penalty_default_fields() {
2342 let cfg = WigglePenaltyConfig::cubic_triple_operator_default();
2343 assert_eq!(cfg.degree, 3);
2344 assert_eq!(cfg.num_internal_knots, 8);
2345 assert_eq!(cfg.penalty_orders, vec![1, 2, 3]);
2346 assert!(cfg.double_penalty);
2347 assert!((cfg.monotonicity_eps - 1e-4).abs() < 1e-15);
2348 }
2349
2350 // -----------------------------------------------------------------------
2351 // is_valid_tweedie_power
2352 // -----------------------------------------------------------------------
2353
2354 #[test]
2355 fn tweedie_power_valid_interior() {
2356 assert!(is_valid_tweedie_power(1.5));
2357 assert!(is_valid_tweedie_power(1.1));
2358 assert!(is_valid_tweedie_power(1.9));
2359 }
2360
2361 #[test]
2362 fn tweedie_power_boundaries_invalid() {
2363 assert!(!is_valid_tweedie_power(1.0));
2364 assert!(!is_valid_tweedie_power(2.0));
2365 }
2366
2367 #[test]
2368 fn tweedie_power_outside_interval_invalid() {
2369 assert!(!is_valid_tweedie_power(0.5));
2370 assert!(!is_valid_tweedie_power(2.5));
2371 assert!(!is_valid_tweedie_power(-1.0));
2372 assert!(!is_valid_tweedie_power(f64::INFINITY));
2373 }
2374
2375 // -----------------------------------------------------------------------
2376 // StandardLink <-> LinkFunction conversions
2377 // -----------------------------------------------------------------------
2378
2379 #[test]
2380 fn standard_link_roundtrip_to_link_function() {
2381 assert_eq!(StandardLink::Logit.as_link_function(), LinkFunction::Logit);
2382 assert_eq!(StandardLink::Probit.as_link_function(), LinkFunction::Probit);
2383 assert_eq!(StandardLink::CLogLog.as_link_function(), LinkFunction::CLogLog);
2384 assert_eq!(StandardLink::Identity.as_link_function(), LinkFunction::Identity);
2385 assert_eq!(StandardLink::Log.as_link_function(), LinkFunction::Log);
2386 }
2387
2388 #[test]
2389 fn standard_link_from_link_function_state_bearing_errors() {
2390 assert!(StandardLink::try_from(LinkFunction::Sas).is_err());
2391 assert!(StandardLink::try_from(LinkFunction::BetaLogistic).is_err());
2392 }
2393
2394 #[test]
2395 fn standard_link_from_link_function_standard_ok() {
2396 assert_eq!(StandardLink::try_from(LinkFunction::Logit), Ok(StandardLink::Logit));
2397 assert_eq!(StandardLink::try_from(LinkFunction::Log), Ok(StandardLink::Log));
2398 }
2399
2400 // -----------------------------------------------------------------------
2401 // LikelihoodSpec: legal-cell matrix
2402 // -----------------------------------------------------------------------
2403
2404 #[test]
2405 fn legal_cells_accepted() {
2406 assert!(LikelihoodSpec::try_new(
2407 ResponseFamily::Gaussian,
2408 InverseLink::Standard(StandardLink::Identity)
2409 )
2410 .is_ok());
2411 assert!(LikelihoodSpec::try_new(
2412 ResponseFamily::Poisson,
2413 InverseLink::Standard(StandardLink::Log)
2414 )
2415 .is_ok());
2416 assert!(LikelihoodSpec::try_new(
2417 ResponseFamily::Gamma,
2418 InverseLink::Standard(StandardLink::Log)
2419 )
2420 .is_ok());
2421 assert!(LikelihoodSpec::try_new(
2422 ResponseFamily::Beta { phi: 1.0 },
2423 InverseLink::Standard(StandardLink::Logit)
2424 )
2425 .is_ok());
2426 assert!(LikelihoodSpec::try_new(
2427 ResponseFamily::Binomial,
2428 InverseLink::Standard(StandardLink::Logit)
2429 )
2430 .is_ok());
2431 assert!(LikelihoodSpec::try_new(
2432 ResponseFamily::Binomial,
2433 InverseLink::Standard(StandardLink::Probit)
2434 )
2435 .is_ok());
2436 assert!(LikelihoodSpec::try_new(
2437 ResponseFamily::Binomial,
2438 InverseLink::Standard(StandardLink::CLogLog)
2439 )
2440 .is_ok());
2441 }
2442
2443 #[test]
2444 fn illegal_cells_rejected() {
2445 assert!(LikelihoodSpec::try_new(
2446 ResponseFamily::Poisson,
2447 InverseLink::Standard(StandardLink::Identity)
2448 )
2449 .is_err());
2450 assert!(LikelihoodSpec::try_new(
2451 ResponseFamily::Gaussian,
2452 InverseLink::Standard(StandardLink::Logit)
2453 )
2454 .is_err());
2455 assert!(LikelihoodSpec::try_new(
2456 ResponseFamily::Binomial,
2457 InverseLink::Standard(StandardLink::Log)
2458 )
2459 .is_err());
2460 assert!(LikelihoodSpec::try_new(
2461 ResponseFamily::Binomial,
2462 InverseLink::Standard(StandardLink::Identity)
2463 )
2464 .is_err());
2465 }
2466
2467 #[test]
2468 fn likelihood_spec_kind_names() {
2469 assert_eq!(LikelihoodSpec::gaussian_identity().name(), "gaussian");
2470 assert_eq!(LikelihoodSpec::poisson_log().name(), "poisson-log");
2471 assert_eq!(LikelihoodSpec::binomial_logit().name(), "binomial-logit");
2472 assert_eq!(LikelihoodSpec::gamma_log().name(), "gamma-log");
2473 }
2474
2475 // -----------------------------------------------------------------------
2476 // ResponseFamily::infer_from_response
2477 // -----------------------------------------------------------------------
2478
2479 #[test]
2480 fn infer_binary_kind_gives_binomial() {
2481 let y = arr1(&[0.0_f64, 1.0]);
2482 let result = ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Binary);
2483 assert!(matches!(result, Ok(ResponseFamily::Binomial)));
2484 }
2485
2486 #[test]
2487 fn infer_categorical_kind_refuses() {
2488 let y = arr1(&[0.0_f64, 1.0]);
2489 let result = ResponseFamily::infer_from_response(
2490 y.view(),
2491 ResponseColumnKind::Categorical { levels: vec!["yes".to_string(), "no".to_string()] },
2492 );
2493 assert!(result.is_err());
2494 }
2495
2496 #[test]
2497 fn infer_numeric_binary_values_gives_binomial() {
2498 let y = arr1(&[0.0_f64, 1.0, 0.0, 1.0, 0.0]);
2499 let result =
2500 ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Numeric);
2501 assert!(matches!(result, Ok(ResponseFamily::Binomial)));
2502 }
2503
2504 #[test]
2505 fn infer_numeric_count_values_gives_poisson() {
2506 let y = arr1(&[0.0_f64, 1.0, 2.0, 3.0, 5.0]);
2507 let result =
2508 ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Numeric);
2509 assert!(matches!(result, Ok(ResponseFamily::Poisson)));
2510 }
2511
2512 #[test]
2513 fn infer_numeric_fractional_gives_gaussian() {
2514 let y = arr1(&[1.5_f64, 2.3, 3.7]);
2515 let result =
2516 ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Numeric);
2517 assert!(matches!(result, Ok(ResponseFamily::Gaussian)));
2518 }
2519
2520 #[test]
2521 fn infer_numeric_negative_gives_gaussian() {
2522 let y = arr1(&[-1.0_f64, 0.0, 1.0]);
2523 let result =
2524 ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Numeric);
2525 assert!(matches!(result, Ok(ResponseFamily::Gaussian)));
2526 }
2527
2528 // -----------------------------------------------------------------------
2529 // ResponseFamily::validate_response_support
2530 // -----------------------------------------------------------------------
2531
2532 #[test]
2533 fn gaussian_support_accepts_any_finite() {
2534 let y = arr1(&[-100.0_f64, 0.0, 100.0]);
2535 assert!(ResponseFamily::Gaussian.validate_response_support(y.view()).is_ok());
2536 }
2537
2538 #[test]
2539 fn gamma_support_rejects_zero() {
2540 let y = arr1(&[0.0_f64, 1.0, 2.0]);
2541 assert!(ResponseFamily::Gamma.validate_response_support(y.view()).is_err());
2542 }
2543
2544 #[test]
2545 fn gamma_support_rejects_negative() {
2546 let y = arr1(&[-1.0_f64, 1.0]);
2547 assert!(ResponseFamily::Gamma.validate_response_support(y.view()).is_err());
2548 }
2549
2550 #[test]
2551 fn gamma_support_accepts_positive() {
2552 let y = arr1(&[0.1_f64, 1.0, 100.0]);
2553 assert!(ResponseFamily::Gamma.validate_response_support(y.view()).is_ok());
2554 }
2555
2556 #[test]
2557 fn binomial_support_rejects_fractional() {
2558 let y = arr1(&[0.0_f64, 0.5, 1.0]);
2559 assert!(ResponseFamily::Binomial.validate_response_support(y.view()).is_err());
2560 }
2561
2562 #[test]
2563 fn binomial_support_accepts_binary() {
2564 let y = arr1(&[0.0_f64, 1.0, 0.0, 1.0]);
2565 assert!(ResponseFamily::Binomial.validate_response_support(y.view()).is_ok());
2566 }
2567
2568 #[test]
2569 fn poisson_support_rejects_negative() {
2570 let y = arr1(&[-1.0_f64, 0.0, 1.0]);
2571 assert!(ResponseFamily::Poisson.validate_response_support(y.view()).is_err());
2572 }
2573
2574 #[test]
2575 fn poisson_support_accepts_nonneg() {
2576 let y = arr1(&[0.0_f64, 1.0, 2.0, 10.0]);
2577 assert!(ResponseFamily::Poisson.validate_response_support(y.view()).is_ok());
2578 }
2579
2580 #[test]
2581 fn beta_support_rejects_zero_boundary() {
2582 let y = arr1(&[0.0_f64, 0.5]);
2583 assert!(ResponseFamily::Beta { phi: 1.0 }.validate_response_support(y.view()).is_err());
2584 }
2585
2586 #[test]
2587 fn beta_support_rejects_one_boundary() {
2588 let y = arr1(&[0.5_f64, 1.0]);
2589 assert!(ResponseFamily::Beta { phi: 1.0 }.validate_response_support(y.view()).is_err());
2590 }
2591
2592 #[test]
2593 fn beta_support_accepts_open_interval() {
2594 let y = arr1(&[0.1_f64, 0.5, 0.9]);
2595 assert!(ResponseFamily::Beta { phi: 1.0 }.validate_response_support(y.view()).is_ok());
2596 }
2597
2598 // -----------------------------------------------------------------------
2599 // ResponseFamily::validate_response_degeneracy
2600 // -----------------------------------------------------------------------
2601
2602 #[test]
2603 fn binomial_degeneracy_all_zeros_errors() {
2604 let y = arr1(&[0.0_f64, 0.0, 0.0]);
2605 assert!(ResponseFamily::Binomial.validate_response_degeneracy(y.view()).is_err());
2606 }
2607
2608 #[test]
2609 fn binomial_degeneracy_all_ones_errors() {
2610 let y = arr1(&[1.0_f64, 1.0, 1.0]);
2611 assert!(ResponseFamily::Binomial.validate_response_degeneracy(y.view()).is_err());
2612 }
2613
2614 #[test]
2615 fn binomial_degeneracy_mixed_ok() {
2616 let y = arr1(&[0.0_f64, 1.0, 0.0]);
2617 assert!(ResponseFamily::Binomial.validate_response_degeneracy(y.view()).is_ok());
2618 }
2619
2620 #[test]
2621 fn gaussian_degeneracy_exactly_constant_errors() {
2622 // An exactly-constant response has sample sd 0 ⇒ the marginal
2623 // `−n/2·log σ²` diverges; the guard must reject it pre-fit (#332).
2624 let y = arr1(&[1.0_f64, 1.0, 1.0]);
2625 let err = ResponseFamily::Gaussian
2626 .validate_response_degeneracy(y.view())
2627 .expect_err("exactly-constant Gaussian response must be rejected");
2628 assert!(matches!(
2629 err.kind,
2630 ResponseDegeneracyKind::GaussianNearConstant { .. }
2631 ));
2632 }
2633
2634 #[test]
2635 fn gaussian_degeneracy_near_constant_reproducer_errors() {
2636 // The issue reproducer: a response with sd ~ 1e-13, well below the
2637 // `1e-10` floor, so the REML score blows up to +inf without the guard.
2638 let y = arr1(&[
2639 5.0_f64,
2640 5.0 + 1.0e-13,
2641 5.0 - 1.0e-13,
2642 5.0 + 2.0e-13,
2643 5.0 - 2.0e-13,
2644 ]);
2645 let err = ResponseFamily::Gaussian
2646 .validate_response_degeneracy(y.view())
2647 .expect_err("near-constant Gaussian response must be rejected");
2648 match err.kind {
2649 ResponseDegeneracyKind::GaussianNearConstant { sample_sd, min_sd } => {
2650 assert!(
2651 sample_sd <= min_sd,
2652 "guard must fire only when sample_sd ({sample_sd:.3e}) <= min_sd ({min_sd:.0e})"
2653 );
2654 assert_eq!(min_sd, GAUSSIAN_MIN_SAMPLE_SD);
2655 // The message quotes both numbers verbatim.
2656 let msg = err.message_for("y");
2657 assert!(msg.contains("effectively constant"), "msg = {msg}");
2658 }
2659 other => panic!("expected GaussianNearConstant, got {other:?}"),
2660 }
2661 }
2662
2663 #[test]
2664 fn gaussian_degeneracy_well_conditioned_ok() {
2665 // A genuinely varying response (sd ~ O(1)) is never tripped.
2666 let y = arr1(&[-2.0_f64, 0.5, 1.7, 3.0, -1.1, 2.2]);
2667 assert!(ResponseFamily::Gaussian
2668 .validate_response_degeneracy(y.view())
2669 .is_ok());
2670 }
2671
2672 #[test]
2673 fn gaussian_degeneracy_small_signal_above_floor_ok() {
2674 // sd ~ 1e-6 is small but far above the 1e-10 floor: a legitimately
2675 // small-but-real signal (e.g. a finely-resolved measurement) must fit,
2676 // so the guard must not over-reject.
2677 let y = arr1(&[1.0_f64, 1.0 + 1.0e-6, 1.0 - 1.0e-6, 1.0 + 2.0e-6]);
2678 assert!(ResponseFamily::Gaussian
2679 .validate_response_degeneracy(y.view())
2680 .is_ok());
2681 }
2682
2683 #[test]
2684 fn gaussian_degeneracy_single_observation_ok() {
2685 // Fewer than two observations carries no estimable scale degeneracy;
2686 // the sample-size gate handles too-small data separately.
2687 let y = arr1(&[42.0_f64]);
2688 assert!(ResponseFamily::Gaussian
2689 .validate_response_degeneracy(y.view())
2690 .is_ok());
2691 }
2692
2693 // -----------------------------------------------------------------------
2694 // ResponseFamily::mean_clamp_bounds / response_support_bounds
2695 // -----------------------------------------------------------------------
2696
2697 #[test]
2698 fn mean_clamp_bounds_binomial_unit_interval() {
2699 assert_eq!(ResponseFamily::Binomial.mean_clamp_bounds(), Some((0.0, 1.0)));
2700 }
2701
2702 #[test]
2703 fn mean_clamp_bounds_gaussian_none() {
2704 assert_eq!(ResponseFamily::Gaussian.mean_clamp_bounds(), None);
2705 }
2706
2707 #[test]
2708 fn mean_clamp_bounds_poisson_none() {
2709 assert_eq!(ResponseFamily::Poisson.mean_clamp_bounds(), None);
2710 }
2711
2712 #[test]
2713 fn response_support_bounds_gamma_nonneg_to_inf() {
2714 assert_eq!(
2715 ResponseFamily::Gamma.response_support_bounds(),
2716 Some((0.0, f64::INFINITY))
2717 );
2718 }
2719
2720 #[test]
2721 fn response_support_bounds_binomial_unit_interval() {
2722 assert_eq!(ResponseFamily::Binomial.response_support_bounds(), Some((0.0, 1.0)));
2723 }
2724
2725 #[test]
2726 fn response_support_bounds_gaussian_none() {
2727 assert_eq!(ResponseFamily::Gaussian.response_support_bounds(), None);
2728 }
2729
2730 // -----------------------------------------------------------------------
2731 // ResponseSupportViolation::message_for
2732 // -----------------------------------------------------------------------
2733
2734 #[test]
2735 fn violation_message_names_column() {
2736 let y = arr1(&[-1.0_f64]);
2737 let err = ResponseFamily::Gamma
2738 .validate_response_support(y.view())
2739 .unwrap_err();
2740 let msg = err.message_for("my_column");
2741 assert!(msg.contains("my_column"), "message: {msg}");
2742 assert!(msg.contains("Gamma"), "message: {msg}");
2743 }
2744
2745 // -----------------------------------------------------------------------
2746 // inverse_link_to_binomial_spec
2747 // -----------------------------------------------------------------------
2748
2749 #[test]
2750 fn binomial_spec_from_logit_link_ok() {
2751 let link = InverseLink::Standard(StandardLink::Logit);
2752 assert!(inverse_link_to_binomial_spec(&link).is_ok());
2753 }
2754
2755 #[test]
2756 fn binomial_spec_from_log_link_errors() {
2757 let link = InverseLink::Standard(StandardLink::Log);
2758 assert!(inverse_link_to_binomial_spec(&link).is_err());
2759 }
2760
2761 #[test]
2762 fn binomial_spec_from_identity_link_errors() {
2763 let link = InverseLink::Standard(StandardLink::Identity);
2764 assert!(inverse_link_to_binomial_spec(&link).is_err());
2765 }
2766
2767 // -----------------------------------------------------------------------
2768 // FamilySpecKind::name / pretty_name
2769 // -----------------------------------------------------------------------
2770
2771 #[test]
2772 fn family_spec_kind_name_non_binomial_variants() {
2773 assert_eq!(FamilySpecKind::GaussianIdentity.name(), "gaussian");
2774 assert_eq!(FamilySpecKind::PoissonLog.name(), "poisson-log");
2775 assert_eq!(FamilySpecKind::GammaLog.name(), "gamma-log");
2776 assert_eq!(FamilySpecKind::TweedieLog { p: 1.5 }.name(), "tweedie-log");
2777 assert_eq!(
2778 FamilySpecKind::NegativeBinomialLog { theta: 2.0 }.name(),
2779 "negative-binomial-log"
2780 );
2781 assert_eq!(FamilySpecKind::BetaLogit { phi: 5.0 }.name(), "beta-regression-logit");
2782 assert_eq!(FamilySpecKind::RoystonParmar.name(), "royston-parmar");
2783 }
2784
2785 #[test]
2786 fn family_spec_kind_name_binomial_variants() {
2787 assert_eq!(FamilySpecKind::BinomialLogit.name(), "binomial-logit");
2788 assert_eq!(FamilySpecKind::BinomialProbit.name(), "binomial-probit");
2789 assert_eq!(FamilySpecKind::BinomialCLogLog.name(), "binomial-cloglog");
2790 }
2791
2792 #[test]
2793 fn family_spec_kind_pretty_name_gaussian() {
2794 assert_eq!(FamilySpecKind::GaussianIdentity.pretty_name(), "Gaussian Identity");
2795 }
2796
2797 #[test]
2798 fn family_spec_kind_pretty_name_binomial_logit() {
2799 assert_eq!(FamilySpecKind::BinomialLogit.pretty_name(), "Binomial Logit");
2800 }
2801
2802 // -----------------------------------------------------------------------
2803 // FamilySpecKind::is_binomial and companions
2804 // -----------------------------------------------------------------------
2805
2806 #[test]
2807 fn is_binomial_true_for_all_binomial_variants() {
2808 assert!(FamilySpecKind::BinomialLogit.is_binomial());
2809 assert!(FamilySpecKind::BinomialProbit.is_binomial());
2810 assert!(FamilySpecKind::BinomialCLogLog.is_binomial());
2811 }
2812
2813 #[test]
2814 fn is_binomial_false_for_non_binomial_variants() {
2815 assert!(!FamilySpecKind::GaussianIdentity.is_binomial());
2816 assert!(!FamilySpecKind::PoissonLog.is_binomial());
2817 assert!(!FamilySpecKind::GammaLog.is_binomial());
2818 assert!(!FamilySpecKind::RoystonParmar.is_binomial());
2819 assert!(!FamilySpecKind::TweedieLog { p: 1.5 }.is_binomial());
2820 assert!(!FamilySpecKind::NegativeBinomialLog { theta: 1.0 }.is_binomial());
2821 assert!(!FamilySpecKind::BetaLogit { phi: 1.0 }.is_binomial());
2822 }
2823
2824 #[test]
2825 fn is_gaussian_identity_true_only_for_gaussian() {
2826 assert!(FamilySpecKind::GaussianIdentity.is_gaussian_identity());
2827 assert!(!FamilySpecKind::PoissonLog.is_gaussian_identity());
2828 assert!(!FamilySpecKind::BinomialLogit.is_gaussian_identity());
2829 }
2830
2831 #[test]
2832 fn is_royston_parmar_true_only_for_royston_parmar() {
2833 assert!(FamilySpecKind::RoystonParmar.is_royston_parmar());
2834 assert!(!FamilySpecKind::GaussianIdentity.is_royston_parmar());
2835 assert!(!FamilySpecKind::BinomialLogit.is_royston_parmar());
2836 }
2837
2838 #[test]
2839 fn supports_firth_iff_is_binomial() {
2840 assert!(FamilySpecKind::BinomialLogit.supports_firth());
2841 assert!(FamilySpecKind::BinomialProbit.supports_firth());
2842 assert!(FamilySpecKind::BinomialCLogLog.supports_firth());
2843 assert!(!FamilySpecKind::GaussianIdentity.supports_firth());
2844 assert!(!FamilySpecKind::PoissonLog.supports_firth());
2845 assert!(!FamilySpecKind::GammaLog.supports_firth());
2846 assert!(!FamilySpecKind::RoystonParmar.supports_firth());
2847 }
2848}