gam_spec/
lib.rs

1use ndarray::{Array1, ArrayView1};
2use serde::{Deserialize, Serialize};
3
4/// Hyperprior placed on a coefficient group's precision / log-precision.
5#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6pub enum CoefficientGroupPrior {
7    Flat,
8    NormalLogPrecision {
9        mean: f64,
10        sd: f64,
11    },
12    GammaPrecision {
13        shape: f64,
14        rate: f64,
15    },
16    /// Penalized-complexity prior calibrated by `P(exp(-rho/2) > upper) =
17    /// tail_prob`; see [`RhoPrior::PenalizedComplexity`].
18    PenalizedComplexity {
19        upper: f64,
20        tail_prob: f64,
21    },
22}
23
24impl CoefficientGroupPrior {
25    pub fn to_rho_prior(&self) -> RhoPrior {
26        match *self {
27            Self::Flat => RhoPrior::Flat,
28            Self::NormalLogPrecision { mean, sd } => RhoPrior::Normal { mean, sd },
29            Self::GammaPrecision { shape, rate } => RhoPrior::GammaPrecision { shape, rate },
30            Self::PenalizedComplexity { upper, tail_prob } => {
31                RhoPrior::PenalizedComplexity { upper, tail_prob }
32            }
33        }
34    }
35
36    pub fn validate(&self, context: &str) -> Result<(), String> {
37        match *self {
38            Self::Flat => Ok(()),
39            Self::NormalLogPrecision { mean, sd } => {
40                if !mean.is_finite() {
41                    return Err(format!(
42                        "{context} Normal log-precision prior requires finite mean, got {mean}"
43                    ));
44                }
45                if !sd.is_finite() || sd <= 0.0 {
46                    return Err(format!(
47                        "{context} Normal log-precision prior requires sd > 0, got {sd}"
48                    ));
49                }
50                Ok(())
51            }
52            Self::GammaPrecision { shape, rate } => {
53                if !shape.is_finite() || shape <= 0.0 {
54                    return Err(format!(
55                        "{context} Gamma precision prior requires shape > 0, got {shape}"
56                    ));
57                }
58                if !rate.is_finite() || rate < 0.0 {
59                    return Err(format!(
60                        "{context} Gamma precision prior requires rate >= 0, got {rate}"
61                    ));
62                }
63                Ok(())
64            }
65            Self::PenalizedComplexity { upper, tail_prob } => {
66                if !upper.is_finite() || upper <= 0.0 {
67                    return Err(format!(
68                        "{context} penalized-complexity prior requires upper > 0, got {upper}"
69                    ));
70                }
71                if !tail_prob.is_finite() || tail_prob <= 0.0 || tail_prob >= 1.0 {
72                    return Err(format!(
73                        "{context} penalized-complexity prior requires tail probability in (0, 1), got {tail_prob}"
74                    ));
75                }
76                Ok(())
77            }
78        }
79    }
80}
81
82/// Shared default for monotone wiggle/deviation blocks. Formula DSL defaults,
83/// workflow configs, and runtime deviation blocks should all derive from this
84/// type so reproducible presets do not drift across layers.
85#[derive(Clone, Debug, Serialize, Deserialize)]
86pub struct WigglePenaltyConfig {
87    pub degree: usize,
88    pub num_internal_knots: usize,
89    pub penalty_orders: Vec<usize>,
90    pub double_penalty: bool,
91    pub monotonicity_eps: f64,
92}
93
94impl WigglePenaltyConfig {
95    pub fn cubic_triple_operator_default() -> Self {
96        Self {
97            degree: 3,
98            num_internal_knots: 8,
99            penalty_orders: vec![1, 2, 3],
100            double_penalty: true,
101            monotonicity_eps: 1e-4,
102        }
103    }
104}
105
106/// Shared engine-level link selector for generalized models. This is the
107/// "wide" link descriptor: CLI parsing, formula DSL, and the projection from
108/// `InverseLink::link_function()` all live in this enum, so it carries every
109/// link kind the engine knows about — including the state-bearing
110/// `Sas` / `BetaLogistic` cases.
111///
112/// `LinkFunction` is *not* the right type for the state-less `InverseLink::Standard`
113/// cell. Use [`StandardLink`] there: the type system then refuses to construct
114/// a state-less `Standard(Sas)` / `Standard(BetaLogistic)` placeholder.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
116pub enum LinkFunction {
117    Logit,
118    Probit,
119    CLogLog,
120    Sas,
121    BetaLogistic,
122    Identity,
123    Log,
124}
125
126impl LinkFunction {
127    #[inline]
128    pub const fn name(self) -> &'static str {
129        match self {
130            Self::Logit => "logit",
131            Self::Probit => "probit",
132            Self::CLogLog => "cloglog",
133            Self::Sas => "sas",
134            Self::BetaLogistic => "beta-logistic",
135            Self::Identity => "identity",
136            Self::Log => "log",
137        }
138    }
139}
140
141/// Legal-only link descriptor for the state-less `InverseLink::Standard` cell.
142///
143/// `Sas` / `BetaLogistic` are state-bearing and live in their own
144/// `InverseLink::Sas(_)` / `InverseLink::BetaLogistic(_)` variants. The type
145/// system enforces that fact by omitting them here, so the historical
146/// "state-less placeholder" pattern (`InverseLink::Standard(LinkFunction::Sas)`)
147/// no longer compiles.
148#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
149pub enum StandardLink {
150    Logit,
151    Probit,
152    CLogLog,
153    Identity,
154    Log,
155}
156
157impl StandardLink {
158    #[inline]
159    pub const fn name(self) -> &'static str {
160        self.as_link_function().name()
161    }
162
163    #[inline]
164    pub const fn as_link_function(self) -> LinkFunction {
165        match self {
166            Self::Logit => LinkFunction::Logit,
167            Self::Probit => LinkFunction::Probit,
168            Self::CLogLog => LinkFunction::CLogLog,
169            Self::Identity => LinkFunction::Identity,
170            Self::Log => LinkFunction::Log,
171        }
172    }
173}
174
175impl From<StandardLink> for LinkFunction {
176    #[inline]
177    fn from(link: StandardLink) -> Self {
178        link.as_link_function()
179    }
180}
181
182/// Error returned when narrowing a wide [`LinkFunction`] into a [`StandardLink`].
183/// `Sas` and `BetaLogistic` are state-bearing and have no legal `Standard(_)`
184/// representation; they must be routed through `InverseLink::Sas` /
185/// `InverseLink::BetaLogistic`.
186#[derive(Debug, Clone, Copy, PartialEq, Eq)]
187pub struct StateBearingLinkInStandardSlot(pub LinkFunction);
188
189impl std::fmt::Display for StateBearingLinkInStandardSlot {
190    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
191        write!(
192            f,
193            "state-bearing link `{}` cannot be carried by `InverseLink::Standard`; \
194             route through `InverseLink::Sas` / `InverseLink::BetaLogistic`",
195            self.0.name()
196        )
197    }
198}
199
200impl std::error::Error for StateBearingLinkInStandardSlot {}
201
202impl TryFrom<LinkFunction> for StandardLink {
203    type Error = StateBearingLinkInStandardSlot;
204
205    #[inline]
206    fn try_from(link: LinkFunction) -> Result<Self, Self::Error> {
207        match link {
208            LinkFunction::Logit => Ok(Self::Logit),
209            LinkFunction::Probit => Ok(Self::Probit),
210            LinkFunction::CLogLog => Ok(Self::CLogLog),
211            LinkFunction::Identity => Ok(Self::Identity),
212            LinkFunction::Log => Ok(Self::Log),
213            LinkFunction::Sas | LinkFunction::BetaLogistic => {
214                Err(StateBearingLinkInStandardSlot(link))
215            }
216        }
217    }
218}
219
220/// Supported inverse-link components for convex blended inverse links.
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
222pub enum LinkComponent {
223    Probit,
224    Logit,
225    CLogLog,
226    LogLog,
227    Cauchit,
228}
229
230impl LinkComponent {
231    #[inline]
232    pub const fn name(self) -> &'static str {
233        match self {
234            Self::Probit => "probit",
235            Self::Logit => "logit",
236            Self::CLogLog => "cloglog",
237            Self::LogLog => "loglog",
238            Self::Cauchit => "cauchit",
239        }
240    }
241}
242
243/// User-facing configuration for a blended inverse link.
244#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
245pub struct MixtureLinkSpec {
246    pub components: Vec<LinkComponent>,
247    /// Free logits for components [0..K-2]. The final component logit is fixed at 0.
248    pub initial_rho: Array1<f64>,
249}
250
251/// Runtime blended-link state with precomputed softmax weights.
252#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
253pub struct MixtureLinkState {
254    pub components: Vec<LinkComponent>,
255    /// Free logits for components [0..K-2]. The final component logit is fixed at 0.
256    pub rho: Array1<f64>,
257    /// Softmax-normalized component weights (length K).
258    pub pi: Array1<f64>,
259}
260
261/// User-facing configuration for the continuous sinh-arcsinh inverse link.
262#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
263pub struct SasLinkSpec {
264    pub initial_epsilon: f64,
265    pub initial_log_delta: f64,
266}
267
268/// Runtime state shared by the two-parameter `Sas` and `BetaLogistic` links:
269/// an `epsilon` skew/asymmetry term plus a raw log-scale parameter (`log_delta`)
270/// and its derived positive companion (`delta`). The `delta` field's meaning is
271/// link-specific — see its doc — so derivative kernels must consume `log_delta`,
272/// never `delta`.
273#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
274pub struct SasLinkState {
275    pub epsilon: f64,
276    /// Raw optimization parameter. For `Sas` this is the pre-bound log-tail; for
277    /// `BetaLogistic` it is the unconstrained log geometric-mean beta shape (the
278    /// `log_shape_center` the beta-logistic kernels expect).
279    pub log_delta: f64,
280    /// Derived positive companion of `log_delta`. Its meaning depends on the link:
281    /// - `Sas`: effective tail parameter `delta = exp(B * tanh(log_delta / B))`,
282    ///   `B = SAS_LOG_DELTA_BOUND`.
283    /// - `BetaLogistic`: geometric-mean beta shape `exp(log_delta) = sqrt(a*b)`.
284    /// The beta-logistic derivative kernels take `log_delta` (the log center), so
285    /// passing this exponentiated `delta` to them would be off by an `exp`.
286    pub delta: f64,
287}
288
289/// Fixed latent Gaussian scale for the exact marginal cloglog family.
290#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
291pub struct LatentCLogLogState {
292    pub latent_sd: f64,
293}
294
295impl LatentCLogLogState {
296    #[inline]
297    pub fn new(latent_sd: f64) -> Result<Self, String> {
298        if !latent_sd.is_finite() || latent_sd < 0.0 {
299            return Err(format!(
300                "latent cloglog standard deviation must be finite and >= 0, got {latent_sd}"
301            ));
302        }
303        Ok(Self { latent_sd })
304    }
305}
306
307/// Whether the inverse link exposes the Fisher-weight jet that the Firth
308/// penalty's higher-order correction consumes. Inlined here (issue #1521) so
309/// `LikelihoodSpec::supports_firth` has no upward dependency on
310/// `solver::mixture_link` — the match is over link variants all defined in this
311/// module, so the predicate is self-contained. The canonical jet evaluation
312/// still lives in `solver::mixture_link`; this is purely the classifier.
313#[inline]
314fn inverse_link_has_fisher_weight_jet(link: &InverseLink) -> bool {
315    matches!(
316        link,
317        InverseLink::Standard(StandardLink::Logit | StandardLink::Probit | StandardLink::CLogLog,)
318            | InverseLink::LatentCLogLog(_)
319            | InverseLink::Sas(_)
320            | InverseLink::BetaLogistic(_)
321            | InverseLink::Mixture(_)
322    )
323}
324
325/// Parameterized inverse-link selector used where mu/derivatives are evaluated.
326#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
327pub enum InverseLink {
328    Standard(StandardLink),
329    LatentCLogLog(LatentCLogLogState),
330    Sas(SasLinkState),
331    BetaLogistic(SasLinkState),
332    Mixture(MixtureLinkState),
333}
334
335impl InverseLink {
336    #[inline]
337    pub const fn link_function(&self) -> LinkFunction {
338        match self {
339            Self::Standard(link) => link.as_link_function(),
340            Self::LatentCLogLog(_) => LinkFunction::CLogLog,
341            Self::Sas(_) => LinkFunction::Sas,
342            Self::BetaLogistic(_) => LinkFunction::BetaLogistic,
343            Self::Mixture(_) => LinkFunction::Logit,
344        }
345    }
346
347    #[inline]
348    pub const fn mixture_state(&self) -> Option<&MixtureLinkState> {
349        match self {
350            Self::Mixture(state) => Some(state),
351            _ => None,
352        }
353    }
354
355    #[inline]
356    pub const fn sas_state(&self) -> Option<&SasLinkState> {
357        match self {
358            Self::Sas(state) | Self::BetaLogistic(state) => Some(state),
359            _ => None,
360        }
361    }
362
363    #[inline]
364    pub const fn latent_cloglog_state(&self) -> Option<&LatentCLogLogState> {
365        match self {
366            Self::LatentCLogLog(state) => Some(state),
367            _ => None,
368        }
369    }
370}
371
372/// Fixed prior family for smoothing parameters in joint HMC refinement.
373#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
374pub enum RhoPrior {
375    Flat,
376    Normal {
377        mean: f64,
378        sd: f64,
379    },
380    /// Gamma(shape, rate) conjugate hyperprior on the precision lambda = exp(rho).
381    ///
382    /// The deterministic REML/LAML objective uses the MAP-in-lambda convention
383    /// and is minimized, so this contributes `rate * exp(rho) - (shape - 1) * rho`
384    /// up to an additive constant. Samplers over rho include the +rho Jacobian
385    /// from lambda = exp(rho), so their log-density contribution is
386    /// `shape * rho - rate * exp(rho)`. For a block with effective dimension n_p
387    /// and centered quadratic
388    /// `(beta - mu)'S_p(beta - mu)`, the conditional posterior is
389    /// `Gamma(shape + n_p/2, rate + quadratic/2)` and the closed-form MAP
390    /// precision is `(shape + n_p/2 - 1) / (rate + quadratic/2)`.
391    /// `Gamma(1, 0)` is the explicit flat/default case and reproduces the
392    /// current MacKay/Tipping fixed point.
393    GammaPrecision {
394        shape: f64,
395        rate: f64,
396    },
397    /// Penalized-complexity (PC) prior on the smoothing parameter
398    /// (Simpson, Rue, Riebler, Martins, Sørbye, *Statistical Science* 2017).
399    ///
400    /// A PC prior fixes a *base* model (here the infinitely-smooth limit, where
401    /// the penalized component collapses to its null space) and puts an
402    /// exponential prior on the distance away from it. For a Gaussian smooth
403    /// with precision `λ = exp(ρ)` the relevant distance is the marginal
404    /// standard-deviation scale `d = λ^{-1/2} = exp(-ρ/2)`, and a constant-rate
405    /// penalization `p(d) = θ exp(-θ d)` induces the closed-form log-prior
406    ///
407    /// ```text
408    /// log p(ρ) = ln(θ/2) − ρ/2 − θ exp(−ρ/2).
409    /// ```
410    ///
411    /// The rate `θ` is calibrated by the single interpretable tail statement
412    /// `P(d > upper) = tail_prob`, i.e. `θ = −ln(tail_prob) / upper`. The prior
413    /// is reparameterization-invariant and shrinks toward the simpler model
414    /// (an exponential wall against under-smoothing, only a gentle linear pull
415    /// toward over-smoothing), which is exactly the Occam behaviour wanted for
416    /// high-variance flexible components. The REML/LAML objective is minimized,
417    /// so this contributes `ρ/2 + θ exp(−ρ/2)` (up to an additive constant) to
418    /// the cost, with gradient `1/2 − (θ/2) exp(−ρ/2)` and (always positive)
419    /// curvature `(θ/4) exp(−ρ/2)`.
420    PenalizedComplexity {
421        /// Upper bound `U` on the distance scale `d = exp(-ρ/2)` (the marginal
422        /// SD scale of the penalized component) in the tail statement
423        /// `P(d > U) = tail_prob`. Must be finite and strictly positive.
424        upper: f64,
425        /// Tail probability `α` in `P(d > U) = α`. Must satisfy `0 < α < 1`.
426        tail_prob: f64,
427    },
428    /// Coordinate-specific priors for models whose smoothing parameters do
429    /// not share one prior family, such as nested coefficient groups.
430    Independent(Vec<RhoPrior>),
431}
432
433impl Default for RhoPrior {
434    fn default() -> Self {
435        Self::Normal { mean: 0.0, sd: 3.0 }
436    }
437}
438
439// ---------------------------------------------------------------------------
440// Unified likelihood specification
441// ---------------------------------------------------------------------------
442//
443// `LikelihoodSpec { response: ResponseFamily, link: InverseLink }` is the
444// canonical likelihood selector. `ResponseFamily` is a pure response-
445// distribution selector that carries the per-family scalars
446// (`Tweedie { p }`, `NegativeBinomial { theta }`, `Beta { phi }`); `InverseLink`
447// is the parameterized inverse-link selector. Splitting (response, link)
448// removes the drift bug that the former flat likelihood enum allowed
449// when its variant disagreed with a separately-stored `InverseLink`.
450
451/// Pure response distribution selector — no link information.
452#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
453pub enum ResponseFamily {
454    Gaussian,
455    Binomial,
456    Poisson,
457    Tweedie {
458        p: f64,
459    },
460    NegativeBinomial {
461        theta: f64,
462        /// `true` when `theta` was supplied by the user as a held-fixed value
463        /// (`--negative-binomial-theta`, issue #983): the fit must use exactly
464        /// this overdispersion — `Var(y) = μ + μ²/θ`, IRLS weight
465        /// `W = μθ/(θ+μ)`, coefficients/covariance/SEs all reflect it — and
466        /// the inner solver must never overwrite it. `false` means `theta` is
467        /// the running seed/estimate refined from the data each inner solve
468        /// (the #802 default). Carried on the family variant — the canonical
469        /// `theta` store — so the estimated-vs-fixed contract can never desync
470        /// from the value itself; `default_scale_metadata` derives the
471        /// matching scale variant.
472        theta_fixed: bool,
473    },
474    Beta {
475        phi: f64,
476    },
477    Gamma,
478    RoystonParmar,
479}
480
481impl ResponseFamily {
482    #[inline]
483    pub const fn name(&self) -> &'static str {
484        match self {
485            Self::Gaussian => "gaussian",
486            Self::Binomial => "binomial",
487            Self::Poisson => "poisson",
488            Self::Tweedie { .. } => "tweedie",
489            Self::NegativeBinomial { .. } => "negative-binomial",
490            Self::Beta { .. } => "beta",
491            Self::Gamma => "gamma",
492            Self::RoystonParmar => "royston-parmar",
493        }
494    }
495
496    /// Closed-interval bounds for the mean (response-scale) of this family.
497    ///
498    /// Used by predict-side CI clamps that need to keep transformed bounds
499    /// within the support of the response. Beta uses strict-open `(1e-10, 1 − 1e-10)`
500    /// to avoid logit singularities; Binomial / Royston-Parmar use the closed
501    /// `[0, 1]` since they are evaluated post-transformation. Unbounded
502    /// (continuous-real or non-negative-real) families return `None` — the
503    /// caller should not clamp.
504    #[inline]
505    pub fn mean_clamp_bounds(&self) -> Option<(f64, f64)> {
506        match self {
507            Self::Binomial | Self::RoystonParmar => Some((0.0, 1.0)),
508            Self::Beta { .. } => Some((1e-10, 1.0 - 1e-10)),
509            Self::Gaussian
510            | Self::Poisson
511            | Self::Tweedie { .. }
512            | Self::NegativeBinomial { .. }
513            | Self::Gamma => None,
514        }
515    }
516
517    /// Closed numeric bounds of the **response support** — the closure of the
518    /// set of values a single observation `Y` can take — used to clamp the
519    /// *observation (prediction) interval* so a predictive band never reports
520    /// values the response can never attain.
521    ///
522    /// This is deliberately distinct from [`Self::mean_clamp_bounds`], which
523    /// governs the *mean* (confidence) interval. `mean_clamp_bounds` returns
524    /// `None` for the non-negative-real families (Poisson / Tweedie /
525    /// NegativeBinomial / Gamma) because their default mean interval is built
526    /// by transforming the η endpoints through a positive inverse link, which
527    /// cannot escape the support. The observation interval, by contrast, is the
528    /// symmetric response-scale band `μ ± z·σ_pred`; for a small fitted mean its
529    /// lower endpoint crosses below the support floor (e.g. a Poisson count band
530    /// going negative), so it must be floored at the response support here.
531    ///
532    /// The lower edge is the infimum of the support (`0` for every non-negative
533    /// family, including the open-at-zero Gamma, whose predictive lower bound is
534    /// reported at the boundary `0`). The upper edge is `+∞` where the response
535    /// is unbounded above, which leaves the upper band untouched, or `1` for the
536    /// `[0, 1]`-valued families. `None` means the response is supported on the
537    /// whole real line (Gaussian) or has its support enforced downstream
538    /// (Royston–Parmar), and the predictive band is passed through unclamped.
539    ///
540    /// The match arms mirror [`Self::response_support_contains`]: a new family
541    /// must update both together so the support a value is validated against and
542    /// the support a predictive band is clamped to stay consistent.
543    #[inline]
544    pub fn response_support_bounds(&self) -> Option<(f64, f64)> {
545        match self {
546            Self::Gamma | Self::Poisson | Self::NegativeBinomial { .. } | Self::Tweedie { .. } => {
547                Some((0.0, f64::INFINITY))
548            }
549            Self::Beta { .. } | Self::Binomial => Some((0.0, 1.0)),
550            Self::Gaussian | Self::RoystonParmar => None,
551        }
552    }
553
554    /// Per-family textual description of the response-support requirement.
555    /// `None` means the family is supported on the entire real line at the
556    /// validation layer (Gaussian) or has its support enforced by a downstream
557    /// pathway (RoystonParmar via the survival pipeline).
558    ///
559    /// `Binomial` is the scalar Bernoulli-logit family: with no per-row trial
560    /// count `mᵢ`, the log-likelihood is `ℓ(η) = y·η − log(1 + eη)`, which is
561    /// unbounded above for `y ∉ {0, 1}` (as `η → ∞`, `ℓ ~ (y − 1)·η`), and the
562    /// binomial deviance term `(1 − y)·log((1 − y)/(1 − μ))` leaves its domain
563    /// for `y > 1`. The family is therefore only well-posed for `y ∈ {0, 1}`,
564    /// which the support check enforces up front rather than deferring to the
565    /// downstream binarity heuristic.
566    #[inline]
567    pub fn response_support_requirement(&self) -> Option<&'static str> {
568        match self {
569            Self::Gamma => Some("strictly positive response values (y > 0)"),
570            Self::Poisson | Self::NegativeBinomial { .. } | Self::Tweedie { .. } => {
571                Some("non-negative response values (y ≥ 0)")
572            }
573            Self::Beta { .. } => Some("response values strictly in the open interval (0, 1)"),
574            Self::Binomial => Some("binary response values (y ∈ {0, 1})"),
575            Self::Gaussian | Self::RoystonParmar => None,
576        }
577    }
578
579    /// Predicate that returns `true` iff `yi` lies in this family's response
580    /// support. Only meaningful for families with a non-trivial domain
581    /// constraint at the validation layer; `validate_response_support` calls
582    /// this only after `response_support_requirement` returns `Some`, so the
583    /// "unconstrained" families (Gaussian / RoystonParmar) never hit this code
584    /// path.
585    ///
586    /// `Binomial` accepts only an (exactly) binary value: `y` must equal `0` or
587    /// `1` to within `BINOMIAL_BINARY_TOL` — the same `{0, 1}` test used by the
588    /// auto-inference and degeneracy paths — because the scalar Bernoulli-logit
589    /// likelihood is unbounded for any other value (see
590    /// `response_support_requirement`).
591    #[inline]
592    fn response_support_contains(&self, yi: f64) -> bool {
593        match self {
594            Self::Gamma => yi.is_finite() && yi > 0.0,
595            Self::Poisson | Self::NegativeBinomial { .. } | Self::Tweedie { .. } => {
596                yi.is_finite() && yi >= 0.0
597            }
598            Self::Beta { .. } => yi.is_finite() && yi > 0.0 && yi < 1.0,
599            Self::Binomial => {
600                yi.is_finite()
601                    && ((yi - 0.0).abs() < BINOMIAL_BINARY_TOL
602                        || (yi - 1.0).abs() < BINOMIAL_BINARY_TOL)
603            }
604            Self::Gaussian | Self::RoystonParmar => true,
605        }
606    }
607
608    /// Human-readable family label used in domain-violation error messages
609    /// (capitalised to match user-facing prose, distinct from `name()` which
610    /// returns the lowercase canonical identifier).
611    #[inline]
612    fn response_support_label(&self) -> &'static str {
613        match self {
614            Self::Gaussian => "Gaussian",
615            Self::Binomial => "Binomial",
616            Self::Poisson => "Poisson",
617            Self::Tweedie { .. } => "Tweedie",
618            Self::NegativeBinomial { .. } => "Negative-Binomial",
619            Self::Beta { .. } => "Beta",
620            Self::Gamma => "Gamma",
621            Self::RoystonParmar => "Royston-Parmar",
622        }
623    }
624
625    /// Validate that every element of `y` lies in this family's response
626    /// support. The check is the upfront, fit-blocking enforcement of the
627    /// family's distributional support — e.g. Gamma rejects `y ≤ 0` because
628    /// the log-likelihood contains `log(y)`, Poisson rejects `y < 0` because
629    /// the log-mass contains `log(y!)`.
630    ///
631    /// Returns `Ok(())` for families whose support is the entire real line at
632    /// this layer (Gaussian) or whose support is enforced by a downstream
633    /// pathway (RoystonParmar via the survival pipeline). `Binomial` is
634    /// enforced here: only `y ∈ {0, 1}` keeps the scalar Bernoulli-logit
635    /// likelihood bounded.
636    ///
637    /// Up to `ResponseSupportViolation::MAX_REPORTED` offending row indices
638    /// are returned in the violation so the message stays bounded on large
639    /// datasets while still identifying offending rows.
640    pub fn validate_response_support(
641        &self,
642        y: ArrayView1<'_, f64>,
643    ) -> Result<(), ResponseSupportViolation> {
644        let requirement = match self.response_support_requirement() {
645            Some(r) => r,
646            None => return Ok(()),
647        };
648        let mut offending: Vec<(usize, f64)> = Vec::new();
649        let mut total_violations: usize = 0;
650        for (i, &yi) in y.iter().enumerate() {
651            if !self.response_support_contains(yi) {
652                total_violations += 1;
653                if offending.len() < ResponseSupportViolation::MAX_REPORTED {
654                    offending.push((i, yi));
655                }
656            }
657        }
658        if total_violations == 0 {
659            Ok(())
660        } else {
661            Err(ResponseSupportViolation {
662                family_label: self.response_support_label(),
663                requirement,
664                offending,
665                total_violations,
666            })
667        }
668    }
669
670    /// Detect a *degenerate* response: one whose value distribution makes the
671    /// family's REML log-likelihood non-finite even though every individual
672    /// `y_i` lies inside the family's distributional support.
673    ///
674    /// Symmetric counterpart to [`Self::validate_response_support`]: support
675    /// rejects out-of-domain *values* (e.g. a negative Poisson count); this
676    /// rejects *distributions* that send the saturated MLE to a boundary at
677    /// which the score diverges. Each family answers the question for itself
678    /// — adding a new family does not require touching workflow.rs.
679    ///
680    /// Concretely:
681    /// * `Binomial` — refuses an all-zero or all-one response: the saturated
682    ///   logit is ±∞ and the REML score is +∞ (issue #331).
683    /// * Every other family currently returns `Ok(())` at this layer — the
684    ///   support check already guarantees enough variation to make the
685    ///   log-likelihood finite.
686    pub fn validate_response_degeneracy(
687        &self,
688        y: ArrayView1<'_, f64>,
689    ) -> Result<(), ResponseDegeneracy> {
690        match self {
691            Self::Binomial => {
692                let mut saw_zero = false;
693                let mut saw_one = false;
694                for &yi in y.iter() {
695                    if (yi - 0.0).abs() < BINOMIAL_BINARY_TOL {
696                        saw_zero = true;
697                    } else if (yi - 1.0).abs() < BINOMIAL_BINARY_TOL {
698                        saw_one = true;
699                    }
700                    if saw_zero && saw_one {
701                        return Ok(());
702                    }
703                }
704                let kind = if saw_one {
705                    ResponseDegeneracyKind::BinomialAllOnes
706                } else if saw_zero {
707                    ResponseDegeneracyKind::BinomialAllZeros
708                } else {
709                    // Reachable only for an empty response: the support check
710                    // (`validate_response_support`) has already rejected any
711                    // non-binary value, so every present `yᵢ` is exactly 0 or 1
712                    // and at least one of `saw_zero`/`saw_one` is set whenever
713                    // `y` is non-empty. An empty response carries no
714                    // saturated-boundary degeneracy, so accept it here.
715                    return Ok(());
716                };
717                Err(ResponseDegeneracy {
718                    family_label: self.response_support_label(),
719                    kind,
720                })
721            }
722            Self::Gaussian => {
723                // A Gaussian fit's marginal REML log-likelihood carries a
724                // `−n/2·log σ²` term; for an effectively-constant response the
725                // ML scale `σ → 0` drives it to `+∞`, so the outer objective
726                // rejects every seed with "reml_score must be finite, got inf"
727                // (#332). Reject pre-fit when the two-pass, mean-centred sample
728                // sd is at or below `GAUSSIAN_MIN_SAMPLE_SD`. Fewer than two
729                // observations carries no estimable scale degeneracy (the
730                // sample-size gate handles too-small data), and any non-finite
731                // value is left to the dedicated finiteness checks rather than
732                // poisoning the sd, so it is skipped here.
733                let mut count = 0usize;
734                let mut mean = 0.0f64;
735                for &yi in y.iter() {
736                    if !yi.is_finite() {
737                        return Ok(());
738                    }
739                    count += 1;
740                    mean += yi;
741                }
742                if count < 2 {
743                    return Ok(());
744                }
745                mean /= count as f64;
746                let mut sumsq = 0.0f64;
747                for &yi in y.iter() {
748                    let d = yi - mean;
749                    sumsq += d * d;
750                }
751                let sample_sd = (sumsq / (count as f64 - 1.0)).sqrt();
752                if sample_sd <= GAUSSIAN_MIN_SAMPLE_SD {
753                    return Err(ResponseDegeneracy {
754                        family_label: self.response_support_label(),
755                        kind: ResponseDegeneracyKind::GaussianNearConstant {
756                            sample_sd,
757                            min_sd: GAUSSIAN_MIN_SAMPLE_SD,
758                        },
759                    });
760                }
761                Ok(())
762            }
763            Self::Poisson
764            | Self::Tweedie { .. }
765            | Self::NegativeBinomial { .. }
766            | Self::Beta { .. }
767            | Self::Gamma
768            | Self::RoystonParmar => Ok(()),
769        }
770    }
771
772    /// Auto-infer a likelihood family when the user did not specify one.
773    ///
774    /// Policy:
775    ///   * A string-valued (`Categorical`) response column is refused —
776    ///     numeric-encoded level indices (e.g. `"yes"`/`"no"` → `0.0`/`1.0`)
777    ///     would otherwise be silently interpreted as a binary outcome,
778    ///     producing a probability model the user never asked for.
779    ///   * A strictly-binary numeric response (`Binary` kind, or `Numeric`
780    ///     with only `{0, 1}` values) maps to `Binomial`.
781    ///   * A non-negative integer-valued count response (every value finite,
782    ///     `>= 0`, and within [`COUNT_INTEGER_TOL`] of an integer) that reaches
783    ///     beyond the binary `{0, 1}` window (i.e. carries at least one value
784    ///     `>= 2`) maps to `Poisson` (log link). This is the "magic-by-default"
785    ///     count detection: mgcv/statsmodels users expect `0,1,2,3,...` to fit a
786    ///     Poisson GLM, not an identity-link Gaussian.
787    ///   * Anything else (any fractional or negative value) maps to `Gaussian`.
788    ///
789    /// The fallback to `is_binary_response` inside the `Numeric` arm is what
790    /// historically lived directly inside `resolve_family`; centralising the
791    /// policy here means every entry point (formula API, CLI, future bindings)
792    /// gets the same default-inference behaviour.
793    pub fn infer_from_response(
794        y: ArrayView1<'_, f64>,
795        y_kind: ResponseColumnKind,
796    ) -> Result<Self, ResponseInferenceRefusal> {
797        match y_kind {
798            ResponseColumnKind::Categorical { levels } => Err(ResponseInferenceRefusal {
799                reason: ResponseInferenceRefusalReason::NonNumericResponse,
800                levels,
801            }),
802            ResponseColumnKind::Binary => Ok(Self::Binomial),
803            ResponseColumnKind::Numeric => {
804                let binary = !y.is_empty()
805                    && y.iter().all(|v| {
806                        v.is_finite()
807                            && ((*v - 0.0).abs() < BINOMIAL_BINARY_TOL
808                                || (*v - 1.0).abs() < BINOMIAL_BINARY_TOL)
809                    });
810                if binary {
811                    return Ok(Self::Binomial);
812                }
813                // Count signature: every value finite, non-negative, and an
814                // integer within `COUNT_INTEGER_TOL`, with at least one value
815                // `>= 2` so it is not the (already-handled) binary case and not
816                // a degenerate all-zero column. A single fractional or negative
817                // value disqualifies the whole response, keeping continuous and
818                // signed data on the conservative Gaussian default.
819                let count = !y.is_empty()
820                    && y.iter().all(|v| {
821                        v.is_finite() && *v >= 0.0 && (*v - v.round()).abs() <= COUNT_INTEGER_TOL
822                    })
823                    && y.iter().any(|v| *v >= 2.0 - COUNT_INTEGER_TOL);
824                if count {
825                    Ok(Self::Poisson)
826                } else {
827                    Ok(Self::Gaussian)
828                }
829            }
830        }
831    }
832}
833
834/// Domain-violation detail produced by [`ResponseFamily::validate_response_support`].
835///
836/// Owns its own `Display` impl so call sites in the workflow, the CLI, and the
837/// external-design GLM path produce identical user-facing prose. The
838/// `total_violations` counter is kept distinct from `offending.len()` so the
839/// message can honestly say `(N total)` even when only the first
840/// `MAX_REPORTED` indices are surfaced.
841#[derive(Debug, Clone)]
842pub struct ResponseSupportViolation {
843    pub family_label: &'static str,
844    pub requirement: &'static str,
845    pub offending: Vec<(usize, f64)>,
846    pub total_violations: usize,
847}
848
849impl ResponseSupportViolation {
850    /// Maximum number of offending row indices reported in the error message.
851    /// Keeps the message bounded on large-scale data while still pointing
852    /// the user at concrete bad rows to inspect.
853    pub const MAX_REPORTED: usize = 5;
854
855    /// Format the violation against a specific response column name. The
856    /// column name is supplied by the caller because [`ResponseFamily`] does
857    /// not know which column the user pointed at.
858    pub fn message_for(&self, response_name: &str) -> String {
859        let shown = self
860            .offending
861            .iter()
862            .map(|(i, v)| format!("y[{i}]={v}"))
863            .collect::<Vec<_>>()
864            .join(", ");
865        let more = if self.total_violations > self.offending.len() {
866            format!(", ... ({} total)", self.total_violations)
867        } else {
868            String::new()
869        };
870        format!(
871            "{family} family requires {req}; response column '{name}' violates this constraint at row(s) [{shown}{more}]",
872            family = self.family_label,
873            req = self.requirement,
874            name = response_name,
875        )
876    }
877}
878
879impl std::fmt::Display for ResponseSupportViolation {
880    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
881        f.write_str(&self.message_for("y"))
882    }
883}
884
885impl std::error::Error for ResponseSupportViolation {}
886
887/// Absolute tolerance for the exact-`{0, 1}` test that defines the scalar
888/// Bernoulli (`Binomial`) response support.
889///
890/// The scalar `Binomial` family carries no per-row trial count, so its
891/// log-likelihood is the Bernoulli/soft-label cross-entropy
892/// `ℓ(η) = y·η − log(1 + eη)`, which is unbounded above for `y ∉ {0, 1}`.
893/// Both the auto-inference (`infer_from_response`) and degeneracy
894/// (`validate_response_degeneracy`) paths classify a value as binary by the
895/// same `1e-12` window; the support check shares this single threshold so the
896/// three layers agree on exactly which responses are admissible.
897pub const BINOMIAL_BINARY_TOL: f64 = 1.0e-12;
898
899/// Minimum admissible sample standard deviation for a `Gaussian` response.
900///
901/// A response whose two-pass, mean-centred sample sd is at or below this
902/// threshold is *effectively constant* in `f64` arithmetic: the marginal REML
903/// log-likelihood carries a `−n/2·log σ²` term that diverges to `+∞` as the
904/// fitted scale `σ → 0`, so the outer objective rejects every seed with
905/// `reml_score must be finite, got inf` (#332). The bound is chosen well below
906/// any well-conditioned scientific signal (genuine data has sd many orders of
907/// magnitude larger) yet above the f64 round-off floor, so it never trips a
908/// real fit while catching responses that carry no signal (e.g. a column read
909/// in the wrong scale, or a constant accidentally fed as the response).
910pub const GAUSSIAN_MIN_SAMPLE_SD: f64 = 1.0e-10;
911
912/// Round tolerance for recognising an integer-valued (count) response.
913///
914/// `infer_from_response` classifies a numeric response as a Poisson count when
915/// every value is finite, non-negative, and within this window of its nearest
916/// non-negative integer. The threshold is looser than [`BINOMIAL_BINARY_TOL`]
917/// because count columns frequently arrive as `f64` round-trips of integers
918/// (CSV parse, integer→double promotion) that accumulate ULP-scale error well
919/// above `1e-12`; `1e-9` admits those without ever matching genuinely
920/// continuous data, whose fractional parts are O(1).
921pub const COUNT_INTEGER_TOL: f64 = 1.0e-9;
922
923/// Classifier for a [`ResponseDegeneracy`]. Each variant carries the family-
924/// specific evidence the caller needs to format a useful message without
925/// having to re-derive the diagnostic.
926#[derive(Debug, Clone)]
927pub enum ResponseDegeneracyKind {
928    /// Bernoulli / Binomial response with every observed value equal to 0.
929    BinomialAllZeros,
930    /// Bernoulli / Binomial response with every observed value equal to 1.
931    BinomialAllOnes,
932    /// Gaussian response that is effectively constant in `f64` arithmetic
933    /// (sample standard deviation at or below [`GAUSSIAN_MIN_SAMPLE_SD`]). The
934    /// marginal REML log-likelihood `−n/2·log σ²` diverges to `+∞` as the
935    /// fitted scale `σ → 0`, so every outer evaluation rejects with a
936    /// non-finite score. Carries the observed `sample_sd` and the `min_sd`
937    /// threshold so the message can quote both verbatim (#332).
938    GaussianNearConstant {
939        /// The two-pass, mean-centred sample standard deviation of the response.
940        sample_sd: f64,
941        /// The rejection threshold ([`GAUSSIAN_MIN_SAMPLE_SD`]).
942        min_sd: f64,
943    },
944}
945
946/// Degenerate-response detail produced by
947/// [`ResponseFamily::validate_response_degeneracy`].
948///
949/// Mirrors [`ResponseSupportViolation`]: it owns its own `Display` and
950/// `message_for(column_name)` so call sites in the workflow, the CLI, and
951/// any future binding produce identical user-facing prose without coupling
952/// each one to the family-internal classifier.
953#[derive(Debug, Clone)]
954pub struct ResponseDegeneracy {
955    pub family_label: &'static str,
956    pub kind: ResponseDegeneracyKind,
957}
958
959impl ResponseDegeneracy {
960    /// Format the degeneracy against a specific response column name. The
961    /// column name is supplied by the caller because [`ResponseFamily`] does
962    /// not know which column the user pointed at.
963    pub fn message_for(&self, response_name: &str) -> String {
964        match self.kind {
965            ResponseDegeneracyKind::BinomialAllZeros => format!(
966                "{family} response '{name}' is degenerate: all values are 0 (no events). \
967                 The maximum-likelihood logit is −∞ at this boundary, so the REML score \
968                 is not finite. Fix: ensure the response contains at least one 0 and \
969                 at least one 1 (e.g. drop the offending subgroup, or refit on a pooled \
970                 sample that includes both classes).",
971                family = self.family_label,
972                name = response_name,
973            ),
974            ResponseDegeneracyKind::BinomialAllOnes => format!(
975                "{family} response '{name}' is degenerate: all values are 1 (no non-events). \
976                 The maximum-likelihood logit is +∞ at this boundary, so the REML score \
977                 is not finite. Fix: ensure the response contains at least one 0 and \
978                 at least one 1 (e.g. drop the offending subgroup, or refit on a pooled \
979                 sample that includes both classes).",
980                family = self.family_label,
981                name = response_name,
982            ),
983            ResponseDegeneracyKind::GaussianNearConstant { sample_sd, min_sd } => format!(
984                "{family} response '{name}' is effectively constant (sample sd ~ {sample_sd:.3e} \
985                 <= {min_sd:.0e}); the marginal REML log-likelihood −n/2·log σ² diverges to \
986                 +∞ as σ → 0. Fix: check the response column units (is it being read in the \
987                 right scale?), centre/rescale the response, or drop the column if it carries \
988                 no signal.",
989                family = self.family_label,
990                name = response_name,
991            ),
992        }
993    }
994}
995
996impl std::fmt::Display for ResponseDegeneracy {
997    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
998        f.write_str(&self.message_for("y"))
999    }
1000}
1001
1002impl std::error::Error for ResponseDegeneracy {}
1003
1004/// Caller-supplied description of the response column's *source* kind.
1005///
1006/// `Categorical { levels }` flags a column that arrived as non-numeric strings
1007/// (the ingest layer encoded its levels to `0.0, 1.0, ...` indices) — the
1008/// `levels` list is preserved so the auto-inference refusal can echo them
1009/// back to the user verbatim. `Binary` is the ingest-layer signal that a
1010/// numeric column already contains only `{0, 1}` (used to short-circuit the
1011/// scan inside [`ResponseFamily::infer_from_response`]). `Numeric` is the
1012/// generic continuous case.
1013#[derive(Debug, Clone)]
1014pub enum ResponseColumnKind {
1015    Numeric,
1016    Binary,
1017    Categorical { levels: Vec<String> },
1018}
1019
1020/// Reason [`ResponseFamily::infer_from_response`] refused to pick a default
1021/// family. Kept as an enum so future policy extensions (e.g. "refuse on
1022/// constant response" — currently a separate CLI-side check) can be added
1023/// without breaking the call site's match arms.
1024#[derive(Debug, Clone)]
1025pub enum ResponseInferenceRefusalReason {
1026    NonNumericResponse,
1027}
1028
1029/// Auto-inference refusal carrying the levels seen in the source column so
1030/// the workflow error can echo them in its message.
1031#[derive(Debug, Clone)]
1032pub struct ResponseInferenceRefusal {
1033    pub reason: ResponseInferenceRefusalReason,
1034    pub levels: Vec<String>,
1035}
1036
1037impl ResponseInferenceRefusal {
1038    /// Format the refusal against a specific response column name.
1039    pub fn message_for(&self, response_name: &str) -> String {
1040        match self.reason {
1041            ResponseInferenceRefusalReason::NonNumericResponse => {
1042                let n = self.levels.len().min(5);
1043                let head = self
1044                    .levels
1045                    .iter()
1046                    .take(n)
1047                    .map(|s| format!("'{s}'"))
1048                    .collect::<Vec<_>>()
1049                    .join(", ");
1050                let preview = if self.levels.len() > n {
1051                    format!("[{head}, ...]")
1052                } else {
1053                    format!("[{head}]")
1054                };
1055                format!(
1056                    "response column '{name}' contains non-numeric values {preview}. \
1057                     Did you mean to use family='binomial' for a binary outcome, \
1058                     or does '{name}' contain categorical labels that should be encoded first?",
1059                    name = response_name,
1060                    preview = preview,
1061                )
1062            }
1063        }
1064    }
1065}
1066
1067impl std::fmt::Display for ResponseInferenceRefusal {
1068    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1069        f.write_str(&self.message_for("y"))
1070    }
1071}
1072
1073impl std::error::Error for ResponseInferenceRefusal {}
1074
1075/// Unified likelihood specification: response distribution + parameterized link.
1076///
1077/// `ResponseFamily` carries the per-family scalars (Tweedie p, NegBin theta,
1078/// Beta phi); `InverseLink` carries the parameterized link state. Together
1079/// they replace the former flat likelihood enum.
1080///
1081/// Only the legal `(response, link)` cells enumerated by [`LikelihoodSpec::kind`]
1082/// are representable through the public surface: [`LikelihoodSpec::try_new`]
1083/// validates the legal matrix on construction, and deserialization routes
1084/// through [`LikelihoodSpecWire`] (`#[serde(try_from / into)]`) so saved bytes
1085/// cannot resurrect an illegal cell. The on-wire shape is byte-identical to the
1086/// historical `{ response, link }` struct, so legal saved models load unchanged.
1087#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1088#[serde(try_from = "LikelihoodSpecWire", into = "LikelihoodSpecWire")]
1089pub struct LikelihoodSpec {
1090    pub response: ResponseFamily,
1091    pub link: InverseLink,
1092}
1093
1094/// Transparent serde shadow of [`LikelihoodSpec`] with the identical wire shape
1095/// (`response`, `link`). All (de)serialization of `LikelihoodSpec` routes
1096/// through this type so the legal-matrix check in
1097/// [`TryFrom<LikelihoodSpecWire>`] runs on every load, closing the
1098/// saved-bytes hole: an illegal `(response, link)` cell deserializes into a
1099/// serde error instead of a silently-masked spec.
1100#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1101pub struct LikelihoodSpecWire {
1102    pub response: ResponseFamily,
1103    pub link: InverseLink,
1104}
1105
1106impl From<LikelihoodSpec> for LikelihoodSpecWire {
1107    #[inline]
1108    fn from(spec: LikelihoodSpec) -> Self {
1109        Self {
1110            response: spec.response,
1111            link: spec.link,
1112        }
1113    }
1114}
1115
1116impl TryFrom<LikelihoodSpecWire> for LikelihoodSpec {
1117    type Error = IllegalLikelihoodCell;
1118
1119    #[inline]
1120    fn try_from(wire: LikelihoodSpecWire) -> Result<Self, Self::Error> {
1121        Self::try_new(wire.response, wire.link)
1122    }
1123}
1124
1125/// Error returned when an illegal `(ResponseFamily, InverseLink)` cell is
1126/// presented to [`LikelihoodSpec::try_new`] or surfaced during
1127/// deserialization. Only the cells enumerated by [`LikelihoodSpec::kind`] are
1128/// legal; every other product cell would silently mask a wrong response
1129/// transformation (e.g. `Poisson + Identity` predicting `μ = η`, which can go
1130/// negative).
1131#[derive(Debug, Clone, PartialEq)]
1132pub struct IllegalLikelihoodCell {
1133    pub response: &'static str,
1134    pub link: &'static str,
1135}
1136
1137impl std::fmt::Display for IllegalLikelihoodCell {
1138    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1139        write!(
1140            f,
1141            "illegal likelihood cell: response `{}` does not admit inverse link `{}`. \
1142             Each non-binomial family is pinned to one link (Gaussian/Royston-Parmar→identity, \
1143             Poisson/Gamma/Tweedie/Negative-Binomial→log, Beta→logit); the binomial family \
1144             admits logit/probit/cloglog and the latent-cloglog/SAS/beta-logistic/blended \
1145             links, but not identity/log.",
1146            self.response, self.link
1147        )
1148    }
1149}
1150
1151impl std::error::Error for IllegalLikelihoodCell {}
1152
1153/// Legal-only enumeration of the `(ResponseFamily, InverseLink)` cells the
1154/// engine recognises. `LikelihoodSpec` is the product type with ~40 nominal
1155/// cells (8 response variants × 5 inverse-link variants), but only the cells
1156/// listed here are honoured by the family math; the rest are silently masked
1157/// by fallback arms. `FamilySpecKind` is the canonical projection used by
1158/// naming, predicates, and dispatch.
1159#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1160pub enum FamilySpecKind {
1161    GaussianIdentity,
1162    PoissonLog,
1163    GammaLog,
1164    TweedieLog { p: f64 },
1165    NegativeBinomialLog { theta: f64 },
1166    BetaLogit { phi: f64 },
1167    RoystonParmar,
1168    BinomialLogit,
1169    BinomialProbit,
1170    BinomialCLogLog,
1171    BinomialLatentCLogLog(LatentCLogLogState),
1172    BinomialSas(SasLinkState),
1173    BinomialBetaLogistic(SasLinkState),
1174    BinomialMixture(MixtureLinkState),
1175}
1176
1177impl FamilySpecKind {
1178    /// Short identifier matching the legacy `LikelihoodSpec::name()` strings.
1179    #[inline]
1180    pub const fn name(&self) -> &'static str {
1181        match self {
1182            Self::GaussianIdentity => "gaussian",
1183            Self::PoissonLog => "poisson-log",
1184            Self::TweedieLog { .. } => "tweedie-log",
1185            Self::NegativeBinomialLog { .. } => "negative-binomial-log",
1186            Self::BetaLogit { .. } => "beta-regression-logit",
1187            Self::GammaLog => "gamma-log",
1188            Self::RoystonParmar => "royston-parmar",
1189            Self::BinomialLogit => "binomial-logit",
1190            Self::BinomialProbit => "binomial-probit",
1191            Self::BinomialCLogLog => "binomial-cloglog",
1192            Self::BinomialLatentCLogLog(_) => "latent-cloglog-binomial",
1193            Self::BinomialSas(_) => "binomial-sas",
1194            Self::BinomialBetaLogistic(_) => "binomial-beta-logistic",
1195            Self::BinomialMixture(_) => "binomial-blended-inverse-link",
1196        }
1197    }
1198
1199    /// Human-readable label matching the legacy `LikelihoodSpec::pretty_name()` strings.
1200    #[inline]
1201    pub const fn pretty_name(&self) -> &'static str {
1202        match self {
1203            Self::GaussianIdentity => "Gaussian Identity",
1204            Self::PoissonLog => "Poisson Log",
1205            Self::TweedieLog { .. } => "Tweedie Log",
1206            Self::NegativeBinomialLog { .. } => "Negative-Binomial Log",
1207            Self::BetaLogit { .. } => "Beta Regression Logit",
1208            Self::GammaLog => "Gamma Log",
1209            Self::RoystonParmar => "Royston Parmar",
1210            Self::BinomialLogit => "Binomial Logit",
1211            Self::BinomialProbit => "Binomial Probit",
1212            Self::BinomialCLogLog => "Binomial CLogLog",
1213            Self::BinomialLatentCLogLog(_) => "Latent CLogLog Binomial",
1214            Self::BinomialSas(_) => "Binomial SAS",
1215            Self::BinomialBetaLogistic(_) => "Binomial Beta-Logistic",
1216            Self::BinomialMixture(_) => "Binomial Blended Inverse-Link",
1217        }
1218    }
1219
1220    #[inline]
1221    pub const fn is_binomial(&self) -> bool {
1222        matches!(
1223            self,
1224            Self::BinomialLogit
1225                | Self::BinomialProbit
1226                | Self::BinomialCLogLog
1227                | Self::BinomialLatentCLogLog(_)
1228                | Self::BinomialSas(_)
1229                | Self::BinomialBetaLogistic(_)
1230                | Self::BinomialMixture(_)
1231        )
1232    }
1233
1234    #[inline]
1235    pub const fn is_gaussian_identity(&self) -> bool {
1236        matches!(self, Self::GaussianIdentity)
1237    }
1238
1239    #[inline]
1240    pub const fn is_royston_parmar(&self) -> bool {
1241        matches!(self, Self::RoystonParmar)
1242    }
1243
1244    #[inline]
1245    pub const fn is_latent_cloglog(&self) -> bool {
1246        matches!(self, Self::BinomialLatentCLogLog(_))
1247    }
1248
1249    #[inline]
1250    pub const fn is_binomial_mixture(&self) -> bool {
1251        matches!(self, Self::BinomialMixture(_))
1252    }
1253
1254    #[inline]
1255    pub const fn is_binomial_sas(&self) -> bool {
1256        matches!(self, Self::BinomialSas(_))
1257    }
1258
1259    #[inline]
1260    pub const fn is_binomial_beta_logistic(&self) -> bool {
1261        matches!(self, Self::BinomialBetaLogistic(_))
1262    }
1263
1264    /// Coarse kind-level Firth eligibility: every binomial inverse link this
1265    /// enum can represent (Logit/Probit/CLogLog and the stateful
1266    /// LatentCLogLog/SAS/Beta-Logistic/Mixture links) carries a Fisher-weight
1267    /// jet, so kind-level Firth support is exactly binomial membership.
1268    ///
1269    /// The authoritative, link-resolved gate is
1270    /// [`LikelihoodSpec::supports_firth`], which routes through
1271    /// `inverse_link_has_fisher_weight_jet`. Keep this in agreement with that
1272    /// predicate: a future binomial link without a Fisher-weight jet would make
1273    /// this approximation diverge and must be handled at both sites.
1274    #[inline]
1275    pub const fn supports_firth(&self) -> bool {
1276        self.is_binomial()
1277    }
1278}
1279
1280impl LikelihoodSpec {
1281    /// Unchecked constructor: assembles a `(response, link)` cell *without*
1282    /// validating the legal matrix. Reserved for the in-crate named const
1283    /// constructors below (`gaussian_identity`, `poisson_log`, `beta_logit`,
1284    /// the `binomial_*` family, …), every one of which builds a cell that is
1285    /// legal by construction. The public, fallible entry point for an arbitrary
1286    /// `(response, link)` pair is [`LikelihoodSpec::try_new`]; the serde path
1287    /// also validates via [`LikelihoodSpecWire`]. Do not expose illegal cells
1288    /// through this method.
1289    #[inline]
1290    pub const fn new(response: ResponseFamily, link: InverseLink) -> Self {
1291        Self { response, link }
1292    }
1293
1294    /// Returns `true` when the `(response, link)` pair is one of the legal cells
1295    /// the family math honours — exactly the cells enumerated by
1296    /// [`LikelihoodSpec::kind`] before any masking. Each non-binomial response
1297    /// is pinned to a single inverse link; the binomial family admits its full
1298    /// set of probability links but never the identity/log standard links.
1299    #[inline]
1300    pub fn is_legal_cell(response: &ResponseFamily, link: &InverseLink) -> bool {
1301        match response {
1302            // Pure-identity families.
1303            ResponseFamily::Gaussian | ResponseFamily::RoystonParmar => {
1304                matches!(link, InverseLink::Standard(StandardLink::Identity))
1305            }
1306            // Log-link families.
1307            ResponseFamily::Poisson
1308            | ResponseFamily::Gamma
1309            | ResponseFamily::Tweedie { .. }
1310            | ResponseFamily::NegativeBinomial { .. } => {
1311                matches!(link, InverseLink::Standard(StandardLink::Log))
1312            }
1313            // Logit-link family.
1314            ResponseFamily::Beta { .. } => {
1315                matches!(link, InverseLink::Standard(StandardLink::Logit))
1316            }
1317            // Binomial admits every probability link except the inert
1318            // identity/log standard links.
1319            ResponseFamily::Binomial => match link {
1320                InverseLink::Standard(
1321                    StandardLink::Logit | StandardLink::Probit | StandardLink::CLogLog,
1322                ) => true,
1323                InverseLink::Standard(StandardLink::Identity | StandardLink::Log) => false,
1324                InverseLink::LatentCLogLog(_)
1325                | InverseLink::Sas(_)
1326                | InverseLink::BetaLogistic(_)
1327                | InverseLink::Mixture(_) => true,
1328            },
1329        }
1330    }
1331
1332    /// Fallible constructor over an arbitrary `(response, link)` pair. Validates
1333    /// the legal matrix ([`LikelihoodSpec::is_legal_cell`]) so that an illegal
1334    /// cell — one whose stored link would drive a wrong response transformation
1335    /// — is rejected instead of silently masked by [`LikelihoodSpec::kind`].
1336    #[inline]
1337    pub fn try_new(
1338        response: ResponseFamily,
1339        link: InverseLink,
1340    ) -> Result<Self, IllegalLikelihoodCell> {
1341        if Self::is_legal_cell(&response, &link) {
1342            Ok(Self::new(response, link))
1343        } else {
1344            Err(IllegalLikelihoodCell {
1345                response: response.name(),
1346                link: link.link_function().name(),
1347            })
1348        }
1349    }
1350
1351    #[inline]
1352    pub const fn gaussian_identity() -> Self {
1353        Self::new(
1354            ResponseFamily::Gaussian,
1355            InverseLink::Standard(StandardLink::Identity),
1356        )
1357    }
1358
1359    #[inline]
1360    pub const fn binomial_logit() -> Self {
1361        Self::new(
1362            ResponseFamily::Binomial,
1363            InverseLink::Standard(StandardLink::Logit),
1364        )
1365    }
1366
1367    #[inline]
1368    pub const fn binomial_probit() -> Self {
1369        Self::new(
1370            ResponseFamily::Binomial,
1371            InverseLink::Standard(StandardLink::Probit),
1372        )
1373    }
1374
1375    #[inline]
1376    pub const fn binomial_cloglog() -> Self {
1377        Self::new(
1378            ResponseFamily::Binomial,
1379            InverseLink::Standard(StandardLink::CLogLog),
1380        )
1381    }
1382
1383    #[inline]
1384    pub const fn binomial_latent_cloglog(state: LatentCLogLogState) -> Self {
1385        Self::new(ResponseFamily::Binomial, InverseLink::LatentCLogLog(state))
1386    }
1387
1388    #[inline]
1389    pub const fn binomial_sas(state: SasLinkState) -> Self {
1390        Self::new(ResponseFamily::Binomial, InverseLink::Sas(state))
1391    }
1392
1393    #[inline]
1394    pub const fn binomial_beta_logistic(state: SasLinkState) -> Self {
1395        Self::new(ResponseFamily::Binomial, InverseLink::BetaLogistic(state))
1396    }
1397
1398    #[inline]
1399    pub fn binomial_mixture(state: MixtureLinkState) -> Self {
1400        Self::new(ResponseFamily::Binomial, InverseLink::Mixture(state))
1401    }
1402
1403    #[inline]
1404    pub const fn poisson_log() -> Self {
1405        Self::new(
1406            ResponseFamily::Poisson,
1407            InverseLink::Standard(StandardLink::Log),
1408        )
1409    }
1410
1411    #[inline]
1412    pub const fn tweedie_log(p: f64) -> Self {
1413        Self::new(
1414            ResponseFamily::Tweedie { p },
1415            InverseLink::Standard(StandardLink::Log),
1416        )
1417    }
1418
1419    /// Estimated-theta NB spec: `theta` is the seed, refined by the inner
1420    /// solver (#802 default).
1421    #[inline]
1422    pub const fn negative_binomial_log(theta: f64) -> Self {
1423        Self::new(
1424            ResponseFamily::NegativeBinomial {
1425                theta,
1426                theta_fixed: false,
1427            },
1428            InverseLink::Standard(StandardLink::Log),
1429        )
1430    }
1431
1432    /// Fixed-theta NB spec: the fit holds `theta` at exactly this value
1433    /// (`--negative-binomial-theta`, issue #983).
1434    #[inline]
1435    pub const fn negative_binomial_log_fixed(theta: f64) -> Self {
1436        Self::new(
1437            ResponseFamily::NegativeBinomial {
1438                theta,
1439                theta_fixed: true,
1440            },
1441            InverseLink::Standard(StandardLink::Log),
1442        )
1443    }
1444
1445    #[inline]
1446    pub const fn beta_logit(phi: f64) -> Self {
1447        Self::new(
1448            ResponseFamily::Beta { phi },
1449            InverseLink::Standard(StandardLink::Logit),
1450        )
1451    }
1452
1453    #[inline]
1454    pub const fn gamma_log() -> Self {
1455        Self::new(
1456            ResponseFamily::Gamma,
1457            InverseLink::Standard(StandardLink::Log),
1458        )
1459    }
1460
1461    #[inline]
1462    pub const fn royston_parmar() -> Self {
1463        Self::new(
1464            ResponseFamily::RoystonParmar,
1465            InverseLink::Standard(StandardLink::Identity),
1466        )
1467    }
1468
1469    #[inline]
1470    pub const fn link_function(&self) -> LinkFunction {
1471        self.link.link_function()
1472    }
1473
1474    /// Once-and-for-all classification into the legal-only `FamilySpecKind`.
1475    ///
1476    /// `(ResponseFamily, InverseLink)` is a 40-cell product (8 response × 5
1477    /// inverse-link); only the cells listed here are legal. Construction
1478    /// ([`LikelihoodSpec::try_new`]) and deserialization (the
1479    /// [`LikelihoodSpecWire`] `try_from`) both enforce
1480    /// [`LikelihoodSpec::is_legal_cell`], so an illegal cell can never reach
1481    /// this method. Each link-pinned family therefore matches its *one* legal
1482    /// link explicitly; the remaining (now-unreachable) illegal combinations
1483    /// are `unreachable!()` so the historical silent masking — collapsing e.g.
1484    /// `Poisson + Identity` to `PoissonLog` while the transform predicted
1485    /// `μ = η` — can never silently happen again.
1486    pub fn kind(&self) -> FamilySpecKind {
1487        // `legal_cell_kind` returns `Some` for every legal cell and `None`
1488        // for the (by-construction-unreachable) illegal ones. Construction
1489        // (`try_new`) and deserialization (`LikelihoodSpecWire` try_from)
1490        // both enforce `is_legal_cell`, so the `None` branch can never fire
1491        // on a value that exists — `.expect` is the idiomatic loud-on-
1492        // impossible-state assertion (a banned `unreachable!`/`panic!` macro
1493        // would be the same panic with worse provenance). If it ever does
1494        // fire, the message names the offending cell so the silent-masking
1495        // regression this guards against (e.g. `Poisson + Identity`
1496        // collapsing to `PoissonLog`) stays impossible.
1497        self.legal_cell_kind().expect(
1498            "illegal likelihood cell reached kind(): construction (try_new) and \
1499             deserialization (LikelihoodSpecWire) guarantee legality",
1500        )
1501    }
1502
1503    fn legal_cell_kind(&self) -> Option<FamilySpecKind> {
1504        Some(match (&self.response, &self.link) {
1505            (ResponseFamily::Gaussian, InverseLink::Standard(StandardLink::Identity)) => {
1506                FamilySpecKind::GaussianIdentity
1507            }
1508            (ResponseFamily::RoystonParmar, InverseLink::Standard(StandardLink::Identity)) => {
1509                FamilySpecKind::RoystonParmar
1510            }
1511            (ResponseFamily::Poisson, InverseLink::Standard(StandardLink::Log)) => {
1512                FamilySpecKind::PoissonLog
1513            }
1514            (ResponseFamily::Gamma, InverseLink::Standard(StandardLink::Log)) => {
1515                FamilySpecKind::GammaLog
1516            }
1517            (ResponseFamily::Tweedie { p }, InverseLink::Standard(StandardLink::Log)) => {
1518                FamilySpecKind::TweedieLog { p: *p }
1519            }
1520            (
1521                ResponseFamily::NegativeBinomial { theta, .. },
1522                InverseLink::Standard(StandardLink::Log),
1523            ) => FamilySpecKind::NegativeBinomialLog { theta: *theta },
1524            (ResponseFamily::Beta { phi }, InverseLink::Standard(StandardLink::Logit)) => {
1525                FamilySpecKind::BetaLogit { phi: *phi }
1526            }
1527            (ResponseFamily::Binomial, InverseLink::Standard(StandardLink::Logit)) => {
1528                FamilySpecKind::BinomialLogit
1529            }
1530            (ResponseFamily::Binomial, InverseLink::Standard(StandardLink::Probit)) => {
1531                FamilySpecKind::BinomialProbit
1532            }
1533            (ResponseFamily::Binomial, InverseLink::Standard(StandardLink::CLogLog)) => {
1534                FamilySpecKind::BinomialCLogLog
1535            }
1536            (ResponseFamily::Binomial, InverseLink::LatentCLogLog(state)) => {
1537                FamilySpecKind::BinomialLatentCLogLog(*state)
1538            }
1539            (ResponseFamily::Binomial, InverseLink::Sas(state)) => {
1540                FamilySpecKind::BinomialSas(*state)
1541            }
1542            (ResponseFamily::Binomial, InverseLink::BetaLogistic(state)) => {
1543                FamilySpecKind::BinomialBetaLogistic(*state)
1544            }
1545            (ResponseFamily::Binomial, InverseLink::Mixture(state)) => {
1546                FamilySpecKind::BinomialMixture(state.clone())
1547            }
1548            // Every remaining product cell is illegal. `try_new` /
1549            // `LikelihoodSpecWire::try_from` reject these, so construction and
1550            // deserialization guarantee they are unreachable here; `None`
1551            // surfaces that to `kind()`, which aborts loudly via `.expect`
1552            // rather than misclassify the family (a wrong `FamilySpecKind`
1553            // would silently corrupt every downstream likelihood/gradient
1554            // evaluation). A banned `panic!`/`unreachable!` macro would be the
1555            // same divergence with worse provenance.
1556            _ => return None,
1557        })
1558    }
1559
1560    #[inline]
1561    pub fn is_binomial(&self) -> bool {
1562        self.kind().is_binomial()
1563    }
1564
1565    #[inline]
1566    pub fn is_gaussian_identity(&self) -> bool {
1567        self.kind().is_gaussian_identity()
1568    }
1569
1570    #[inline]
1571    pub fn is_royston_parmar(&self) -> bool {
1572        self.kind().is_royston_parmar()
1573    }
1574
1575    #[inline]
1576    pub fn is_latent_cloglog(&self) -> bool {
1577        self.kind().is_latent_cloglog()
1578    }
1579
1580    #[inline]
1581    pub fn is_binomial_mixture(&self) -> bool {
1582        self.kind().is_binomial_mixture()
1583    }
1584
1585    #[inline]
1586    pub fn is_binomial_sas(&self) -> bool {
1587        self.kind().is_binomial_sas()
1588    }
1589
1590    #[inline]
1591    pub fn is_binomial_beta_logistic(&self) -> bool {
1592        self.kind().is_binomial_beta_logistic()
1593    }
1594
1595    /// Default scale metadata for this (response, link).
1596    #[inline]
1597    pub fn default_scale_metadata(&self) -> LikelihoodScaleMetadata {
1598        match &self.response {
1599            ResponseFamily::Gaussian => LikelihoodScaleMetadata::ProfiledGaussian,
1600            ResponseFamily::Gamma => LikelihoodScaleMetadata::EstimatedGammaShape { shape: 1.0 },
1601            // Binomial and Poisson have `phi ≡ 1` (variance fully pinned by the
1602            // mean), so a fixed unit dispersion is correct.
1603            ResponseFamily::Binomial | ResponseFamily::Poisson => {
1604                LikelihoodScaleMetadata::FixedDispersion { phi: 1.0 }
1605            }
1606            // Negative-Binomial's overdispersion `theta` (`Var(y)=mu+mu^2/theta`)
1607            // is a genuine free parameter estimated jointly with the mean by
1608            // default — the family-variant `theta` is only the seed, refined from
1609            // the converged-η ML score during fitting, exactly like the Gamma
1610            // shape / Beta precision / Tweedie φ. Freezing it at the seed made
1611            // every variance-derived output (coefficient/η SEs, Wald and credible
1612            // intervals, predictive intervals, `generate` draws) ignore the
1613            // data's overdispersion (issue #802). `phi` itself stays `≡ 1`.
1614            //
1615            // A user-supplied `--negative-binomial-theta` is the opposite
1616            // contract (issue #983): `theta_fixed = true` routes to the
1617            // non-estimated scale variant, so the inner solver's refresh gate
1618            // (`negbin_theta_is_estimated()`) stays closed and the fit honours
1619            // the held value everywhere it enters.
1620            ResponseFamily::NegativeBinomial { theta, theta_fixed } => {
1621                if *theta_fixed {
1622                    LikelihoodScaleMetadata::FixedNegBinTheta { theta: *theta }
1623                } else {
1624                    LikelihoodScaleMetadata::EstimatedNegBinTheta { theta: *theta }
1625                }
1626            }
1627            // Tweedie's dispersion `phi` is a genuine free parameter
1628            // (`Var(y) = phi · mu^p`) and is estimated jointly with the mean by
1629            // default, exactly like the Gamma shape and Beta precision. The seed
1630            // `phi = 1` is refined from the converged-η Pearson residuals during
1631            // fitting (issue #771). Freezing it at 1 made every variance-derived
1632            // output (SEs, intervals, generate draws) ignore the data's spread.
1633            ResponseFamily::Tweedie { .. } => {
1634                LikelihoodScaleMetadata::EstimatedTweediePhi { phi: 1.0 }
1635            }
1636            // Beta precision is estimated jointly with the mean by default
1637            // (magic-by-default, issue #567): the family-variant `phi` is the
1638            // seed, refined from the working residuals during fitting.
1639            ResponseFamily::Beta { phi } => LikelihoodScaleMetadata::EstimatedBetaPhi { phi: *phi },
1640            ResponseFamily::RoystonParmar => LikelihoodScaleMetadata::Unspecified,
1641        }
1642    }
1643
1644    /// Human-readable label, routed through `FamilySpecKind`.
1645    #[inline]
1646    pub fn pretty_name(&self) -> &'static str {
1647        self.kind().pretty_name()
1648    }
1649
1650    /// Short identifier, routed through `FamilySpecKind`.
1651    #[inline]
1652    pub fn name(&self) -> &'static str {
1653        self.kind().name()
1654    }
1655
1656    #[inline]
1657    pub fn supports_firth(&self) -> bool {
1658        matches!(self.response, ResponseFamily::Binomial)
1659            && inverse_link_has_fisher_weight_jet(&self.link)
1660    }
1661
1662    /// Family-level fixed-dispersion contract. Returns the dispersion parameter
1663    /// `phi` that the GLM log-likelihood / weight expressions treat as fixed
1664    /// for the given `ResponseFamily`, or `None` when the family carries no
1665    /// fixed scale (profiled or jointly estimated).
1666    ///
1667    /// - `Gaussian` and `Gamma` profile/estimate the scale jointly with the
1668    ///   mean, so no fixed `phi` is exposed here.
1669    /// - `Binomial` and `Poisson` are unit-scale exponential-family fits, so the
1670    ///   contract is `Some(1.0)`. NegativeBinomial's overdispersion lives in
1671    ///   `theta` (a separate parameter / flag), not in a free `phi`, so it also
1672    ///   returns `Some(1.0)`.
1673    /// - `Tweedie { p }` carries its variance power on the family variant. Its
1674    ///   free dispersion `phi` lives in `LikelihoodScaleMetadata` and is
1675    ///   estimated by default (`EstimatedTweediePhi`, issue #771), so this
1676    ///   family-level contract only exposes the unit seed used when callers ask
1677    ///   the response family without scale metadata.
1678    /// - `Beta { phi }` carries its precision parameter directly on the family
1679    ///   variant; the contract returns that exact value rather than the
1680    ///   placeholder used elsewhere for unit-scale GLMs.
1681    /// - `RoystonParmar` has no GLM-style dispersion slot.
1682    #[inline]
1683    pub const fn fixed_dispersion(&self) -> Option<f64> {
1684        match self.response {
1685            ResponseFamily::Gaussian | ResponseFamily::Gamma | ResponseFamily::RoystonParmar => {
1686                None
1687            }
1688            ResponseFamily::Binomial
1689            | ResponseFamily::Poisson
1690            | ResponseFamily::Tweedie { .. }
1691            | ResponseFamily::NegativeBinomial { .. } => Some(1.0),
1692            ResponseFamily::Beta { phi } => Some(phi),
1693        }
1694    }
1695}
1696
1697#[inline]
1698pub const fn is_valid_tweedie_power(p: f64) -> bool {
1699    p.is_finite() && p > 1.0 && p < 2.0
1700}
1701
1702/// Error returned when an `InverseLink` cannot be paired with a particular
1703/// response family because the link is structurally unsupported for that
1704/// family. Carries the link name so call sites can produce a useful message
1705/// without losing the offending variant.
1706#[derive(Debug, Clone, PartialEq, Eq)]
1707pub struct UnsupportedLinkError {
1708    pub family: &'static str,
1709    pub link_name: String,
1710}
1711
1712impl UnsupportedLinkError {
1713    /// Construct an `UnsupportedLinkError` tagged with the response-family
1714    /// name (`"binomial"`, `"gaussian"`, ...) and a printable name for the
1715    /// offending `InverseLink` variant (extracted via the module-private
1716    /// `inverse_link_diagnostic_name`). No allocation beyond the link name.
1717    #[inline]
1718    pub fn new(family: &'static str, link: &InverseLink) -> Self {
1719        Self {
1720            family,
1721            link_name: inverse_link_diagnostic_name(link),
1722        }
1723    }
1724}
1725
1726impl std::fmt::Display for UnsupportedLinkError {
1727    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1728        write!(
1729            f,
1730            "inverse link `{}` is not supported by the {} response family",
1731            self.link_name, self.family
1732        )
1733    }
1734}
1735
1736impl std::error::Error for UnsupportedLinkError {}
1737
1738#[inline]
1739pub fn inverse_link_diagnostic_name(link: &InverseLink) -> String {
1740    match link {
1741        InverseLink::Standard(lf) => lf.name().to_string(),
1742        InverseLink::LatentCLogLog(_) => "latent-cloglog".to_string(),
1743        InverseLink::Sas(_) => "sas".to_string(),
1744        InverseLink::BetaLogistic(_) => "beta-logistic".to_string(),
1745        InverseLink::Mixture(_) => "mixture".to_string(),
1746    }
1747}
1748
1749/// Resolve a binomial-flavoured `LikelihoodSpec` from an `InverseLink`.
1750///
1751/// `StandardLink::Logit | Probit | CLogLog` and the state-bearing
1752/// `LatentCLogLog / Sas / BetaLogistic / Mixture` variants are accepted as
1753/// binomial-compatible. `StandardLink::Log | Identity` have no canonical
1754/// binomial meaning and return `UnsupportedLinkError`. Since
1755/// `InverseLink::Standard` carries `StandardLink` (not `LinkFunction`), the
1756/// previously-required `Standard(LinkFunction::Sas | BetaLogistic)` arm is
1757/// structurally impossible and has been removed.
1758#[inline]
1759pub fn inverse_link_to_binomial_spec(
1760    link: &InverseLink,
1761) -> Result<LikelihoodSpec, UnsupportedLinkError> {
1762    match link {
1763        InverseLink::Standard(StandardLink::Logit)
1764        | InverseLink::Standard(StandardLink::Probit)
1765        | InverseLink::Standard(StandardLink::CLogLog) => {
1766            Ok(LikelihoodSpec::new(ResponseFamily::Binomial, link.clone()))
1767        }
1768        InverseLink::LatentCLogLog(_)
1769        | InverseLink::Sas(_)
1770        | InverseLink::BetaLogistic(_)
1771        | InverseLink::Mixture(_) => {
1772            Ok(LikelihoodSpec::new(ResponseFamily::Binomial, link.clone()))
1773        }
1774        InverseLink::Standard(StandardLink::Log)
1775        | InverseLink::Standard(StandardLink::Identity) => {
1776            Err(UnsupportedLinkError::new("binomial", link))
1777        }
1778    }
1779}
1780
1781/// How a likelihood's scale parameter is handled by the fit/result contract.
1782#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
1783pub enum LikelihoodScaleMetadata {
1784    /// Gaussian identity fits profile sigma outside the fixed-scale GLM machinery.
1785    ProfiledGaussian,
1786    /// Fixed exponential-dispersion parameter `phi`.
1787    FixedDispersion { phi: f64 },
1788    /// Fixed Gamma shape `k`, equivalent to `phi = 1 / k`.
1789    FixedGammaShape { shape: f64 },
1790    /// Gamma shape `k` estimated jointly with the mean model.
1791    EstimatedGammaShape { shape: f64 },
1792    /// Beta-regression precision `phi` estimated jointly with the mean model.
1793    /// `Var(y) = mu(1-mu)/(1+phi)`; larger `phi` means less noise. Estimated
1794    /// from the working residuals after each mean fit and refreshed across outer
1795    /// iterations, exactly like the Gamma shape (issue #567).
1796    EstimatedBetaPhi { phi: f64 },
1797    /// Tweedie exponential-dispersion `phi` estimated jointly with the mean
1798    /// model. `Var(y) = phi · mu^p` with `phi` a genuine free parameter (unlike
1799    /// Binomial/Poisson, where `phi ≡ 1`). Estimated by the Pearson moment
1800    /// estimator `phî = Σ wᵢ (yᵢ − μᵢ)² / μᵢ^p / Σ wᵢ` at the converged η and
1801    /// refreshed across outer iterations, exactly like the Gamma shape and the
1802    /// Beta precision. `phi` enters the IRLS working weight `prior·μ^{2−p}/phi`,
1803    /// so the coefficient covariance `Vb = H⁻¹` already scales as `phi` and the
1804    /// reported SEs track `√phi` (issue #771).
1805    EstimatedTweediePhi { phi: f64 },
1806    /// Negative-Binomial overdispersion `theta` estimated jointly with the mean
1807    /// model. `Var(y) = mu + mu^2 / theta`; larger `theta` means less
1808    /// overdispersion (the Poisson limit is `theta → ∞`). Estimated by the
1809    /// maximum-likelihood `theta` score
1810    /// `Σ wᵢ[ψ(yᵢ+θ) − ψ(θ) + lnθ + 1 − ln(θ+μᵢ) − (yᵢ+θ)/(μᵢ+θ)] = 0` at the
1811    /// converged η (MASS `glm.nb`'s `theta.ml`) and refreshed across outer
1812    /// iterations, exactly like the Gamma shape / Beta precision / Tweedie φ.
1813    /// Unlike those, `theta` is *not* a dispersion scale `phi`: it enters only
1814    /// the IRLS working weight `W = μθ/(θ+μ)` (the full NB2 Fisher information),
1815    /// so the stored penalized Hessian is already the true one and the
1816    /// coefficient covariance `Vb = H⁻¹` takes no post-hoc multiply — `phi ≡ 1`
1817    /// for NB, the overdispersion lives in the variance function. The `theta`
1818    /// carried here mirrors `ResponseFamily::NegativeBinomial { theta }` (the
1819    /// canonical store every weight/deviance expression reads), kept in sync by
1820    /// `with_negbin_theta`, exactly as `EstimatedBetaPhi` mirrors `Beta { phi }`
1821    /// (issue #802).
1822    EstimatedNegBinTheta { theta: f64 },
1823    /// Negative-Binomial overdispersion `theta` held fixed at a user-supplied
1824    /// value (`--negative-binomial-theta`, issue #983). Identical role to
1825    /// `EstimatedNegBinTheta` in every weight / variance / covariance
1826    /// expression (`W = μθ/(θ+μ)`, `Var(y) = μ + μ²/θ`, `phi ≡ 1`), but the
1827    /// inner solver's ML refresh is gated off: the recorded `theta` is the
1828    /// user's, by construction. The fixed/estimated split mirrors
1829    /// `FixedGammaShape` vs `EstimatedGammaShape`.
1830    FixedNegBinTheta { theta: f64 },
1831    /// The engine does not expose fixed-scale semantics for this family.
1832    Unspecified,
1833}
1834
1835impl LikelihoodScaleMetadata {
1836    #[inline]
1837    pub const fn fixed_phi(self) -> Option<f64> {
1838        match self {
1839            Self::FixedDispersion { phi }
1840            | Self::EstimatedBetaPhi { phi }
1841            | Self::EstimatedTweediePhi { phi } => Some(phi),
1842            Self::FixedGammaShape { shape } | Self::EstimatedGammaShape { shape } => {
1843                Some(1.0 / shape)
1844            }
1845            // NB's dispersion scale is `phi ≡ 1` (the overdispersion is carried
1846            // by `theta` inside the variance function, not a scale multiply), so
1847            // the fixed-`phi` contract is `Some(1.0)` — NOT `theta`.
1848            Self::EstimatedNegBinTheta { .. } | Self::FixedNegBinTheta { .. } => Some(1.0),
1849            Self::ProfiledGaussian | Self::Unspecified => None,
1850        }
1851    }
1852
1853    /// Whether the Negative-Binomial overdispersion `theta` is estimated from
1854    /// data (the default for NB families, issue #802).
1855    #[inline]
1856    pub const fn negbin_theta_is_estimated(self) -> bool {
1857        matches!(self, Self::EstimatedNegBinTheta { .. })
1858    }
1859
1860    /// The Negative-Binomial `theta` carried in the scale metadata (estimated
1861    /// or user-fixed), or `None` for non-NB families.
1862    #[inline]
1863    pub const fn negbin_theta(self) -> Option<f64> {
1864        match self {
1865            Self::EstimatedNegBinTheta { theta } | Self::FixedNegBinTheta { theta } => Some(theta),
1866            _ => None,
1867        }
1868    }
1869
1870    /// Whether the Beta-regression precision `phi` is estimated from data.
1871    #[inline]
1872    pub const fn beta_phi_is_estimated(self) -> bool {
1873        matches!(self, Self::EstimatedBetaPhi { .. })
1874    }
1875
1876    /// Whether the Tweedie exponential-dispersion `phi` is estimated from data.
1877    #[inline]
1878    pub const fn tweedie_phi_is_estimated(self) -> bool {
1879        matches!(self, Self::EstimatedTweediePhi { .. })
1880    }
1881
1882    #[inline]
1883    pub const fn gamma_shape(self) -> Option<f64> {
1884        match self {
1885            Self::FixedGammaShape { shape } | Self::EstimatedGammaShape { shape } => Some(shape),
1886            _ => None,
1887        }
1888    }
1889
1890    #[inline]
1891    pub const fn gamma_shape_is_estimated(self) -> bool {
1892        matches!(self, Self::EstimatedGammaShape { .. })
1893    }
1894}
1895
1896/// Whether a stored log-likelihood includes response-only normalization constants.
1897#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1898pub enum LogLikelihoodNormalization {
1899    Full,
1900    OmittingResponseConstants,
1901    UserProvided,
1902}
1903
1904/// Explicit GLM likelihood specification: response/link spec plus scale semantics.
1905///
1906/// `spec` is the canonical `(ResponseFamily, InverseLink)` selector. `scale`
1907/// records how the scale parameter is handled (profiled Gaussian sigma, fixed
1908/// dispersion, fixed/estimated Gamma shape). The Gamma shape is mutated in
1909/// place during PIRLS via `with_gamma_shape`; preserving that field on this
1910/// struct is what lets the inner solver thread the estimated shape into
1911/// deviance / log-likelihood / weight evaluation without a separate side
1912/// channel.
1913#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1914pub struct GlmLikelihoodSpec {
1915    pub spec: LikelihoodSpec,
1916    pub scale: LikelihoodScaleMetadata,
1917}
1918
1919impl GlmLikelihoodSpec {
1920    /// Build a `GlmLikelihoodSpec` from a `LikelihoodSpec`, deriving the
1921    /// canonical default scale metadata for the response family.
1922    #[inline]
1923    pub fn canonical(spec: LikelihoodSpec) -> Self {
1924        let scale = spec.default_scale_metadata();
1925        Self { spec, scale }
1926    }
1927
1928    #[inline]
1929    pub fn link_function(&self) -> LinkFunction {
1930        self.spec.link_function()
1931    }
1932
1933    #[inline]
1934    pub fn fixed_phi(&self) -> Option<f64> {
1935        self.scale.fixed_phi()
1936    }
1937
1938    /// Multiplier converting the stored unscaled inverse penalized Hessian
1939    /// `H⁻¹` into the reported coefficient covariance `Vb = H⁻¹ · scale`.
1940    ///
1941    /// # Invariant
1942    ///
1943    /// `Vb` is the inverse of the Hessian of the *actual penalized objective the
1944    /// inner solver minimizes*. The stored Hessian is always assembled as
1945    /// `H = XᵀWX + S_λ`, with the penalty `S_λ` added **unscaled** (see
1946    /// `pirls::penalty::add_to_hessian`). Whether `H` is already that true
1947    /// objective Hessian — and hence whether any post-hoc dispersion multiply is
1948    /// warranted — is decided entirely by what the IRLS working weight `W`
1949    /// carries:
1950    ///
1951    /// * **Working weight already carries the reciprocal dispersion / full
1952    ///   Fisher information.** Then `H = Xᵀ(W_sf/φ)X + S_λ` already equals the
1953    ///   true penalized Hessian (e.g. mgcv's `XᵀW_sfX/φ + S_λ` for Gamma), so
1954    ///   `Vb = H⁻¹` and the scale is exactly `1.0`. This is the case for Gamma
1955    ///   (`W = prior·shape = prior/φ`), Tweedie (`W = prior·μ^{2−p}/φ`), Beta
1956    ///   and Negative-Binomial (the working weight is the complete fixed-scale
1957    ///   Fisher information), and the fixed-scale exponential families
1958    ///   Poisson/Binomial (`φ ≡ 1`). Multiplying `H⁻¹` by the dispersion again
1959    ///   for any of these double-counts it and shrinks every SE by `√dispersion`.
1960    ///
1961    /// * **Working weight is scale-free** (`W = priorweights`, the profiled
1962    ///   Gaussian convention). Then the data term carries an implicit unit scale
1963    ///   and `H = XᵀPX + S_λ` is the Hessian of `½·(scaled deviance)·σ²⁻¹`
1964    ///   *without* the `σ²`. The correct covariance restores it:
1965    ///   `Vb = H⁻¹ · σ̂²`. Only this branch returns a non-unit scale.
1966    ///
1967    /// `profiled_gaussian_phi` is the profiled residual variance `σ̂²` and is
1968    /// consulted **only** for the scale-free profiled-Gaussian branch; every
1969    /// other family ignores it. This deliberately does NOT touch
1970    /// `dispersion()` / `dispersion_from_likelihood`, which still report the
1971    /// response-level observation noise (`1/shape` for Gamma, `1/(1+φ)` for
1972    /// Beta, …) used by predictive-interval construction — a distinct quantity
1973    /// from the coefficient-covariance scale defined here.
1974    #[inline]
1975    pub fn coefficient_covariance_scale(&self, profiled_gaussian_phi: f64) -> f64 {
1976        match self.scale {
1977            // Scale-free working weight: restore the profiled variance.
1978            LikelihoodScaleMetadata::ProfiledGaussian => profiled_gaussian_phi,
1979            // Working weight already carries the dispersion / full Fisher
1980            // information, so the stored H is the true penalized Hessian and no
1981            // further dispersion multiply is warranted.
1982            //
1983            // FixedDispersion covers the explicitly-scaled Gaussian submodel
1984            // (W·=1/φ above) and Negative-Binomial; the Gamma, Beta and Tweedie
1985            // variants fold their reciprocal-dispersion / precision / φ into W
1986            // (Tweedie W = prior·μ^{2−p}/φ, so the SE already scales as √φ); and
1987            // Unspecified families never expose a separate post-hoc scale.
1988            LikelihoodScaleMetadata::FixedDispersion { .. }
1989            | LikelihoodScaleMetadata::FixedGammaShape { .. }
1990            | LikelihoodScaleMetadata::EstimatedGammaShape { .. }
1991            | LikelihoodScaleMetadata::EstimatedBetaPhi { .. }
1992            | LikelihoodScaleMetadata::EstimatedTweediePhi { .. }
1993            // Negative-Binomial folds `theta` into the working weight
1994            // `W = μθ/(θ+μ)` (the full NB2 Fisher information), so the stored
1995            // `H = XᵀWX + S_λ` is already the true penalized Hessian and the
1996            // covariance scale is `1.0` (`phi ≡ 1`). The reported SEs respond to
1997            // the data's overdispersion entirely through that `theta`-dependent
1998            // weight (issue #802) — multiplying again would double-count it.
1999            // The same holds verbatim for a user-fixed `theta` (issue #983).
2000            | LikelihoodScaleMetadata::EstimatedNegBinTheta { .. }
2001            | LikelihoodScaleMetadata::FixedNegBinTheta { .. }
2002            | LikelihoodScaleMetadata::Unspecified => 1.0,
2003        }
2004    }
2005
2006    #[inline]
2007    pub fn gamma_shape(&self) -> Option<f64> {
2008        self.scale.gamma_shape()
2009    }
2010
2011    /// Mutate the Gamma shape parameter in place while preserving the rest of
2012    /// the spec. The shape only takes effect for Gamma families; for other
2013    /// families the scale metadata is left untouched.
2014    #[inline]
2015    pub fn with_gamma_shape(mut self, shape: f64) -> Self {
2016        self.scale = match self.scale {
2017            LikelihoodScaleMetadata::FixedGammaShape { .. } => {
2018                LikelihoodScaleMetadata::FixedGammaShape { shape }
2019            }
2020            LikelihoodScaleMetadata::EstimatedGammaShape { .. } => {
2021                LikelihoodScaleMetadata::EstimatedGammaShape { shape }
2022            }
2023            other => match &self.spec.response {
2024                ResponseFamily::Gamma => LikelihoodScaleMetadata::EstimatedGammaShape { shape },
2025                _ => other,
2026            },
2027        };
2028        self
2029    }
2030
2031    /// Whether the Beta-regression precision `phi` is estimated from data.
2032    #[inline]
2033    pub fn beta_phi_is_estimated(&self) -> bool {
2034        self.scale.beta_phi_is_estimated()
2035    }
2036
2037    /// Mutate the Beta precision `phi` in place, on BOTH the family variant
2038    /// (where every PIRLS weight / deviance / log-likelihood expression reads it
2039    /// via `ResponseFamily::Beta { phi }`) and the scale metadata (the
2040    /// estimated-vs-fixed contract). No-op for non-Beta families. The inner
2041    /// solver calls this once per inner solve after a moment estimate of `phi`
2042    /// from the working residuals, so the IRLS weights `Var(y)=mu(1-mu)/(1+phi)`
2043    /// reflect the true precision rather than the `phi=1` seed (issue #567).
2044    #[inline]
2045    pub fn with_beta_phi(mut self, phi: f64) -> Self {
2046        if let ResponseFamily::Beta { phi: family_phi } = &mut self.spec.response {
2047            *family_phi = phi;
2048            self.scale = LikelihoodScaleMetadata::EstimatedBetaPhi { phi };
2049        }
2050        self
2051    }
2052
2053    /// Whether the Tweedie exponential-dispersion `phi` is estimated from data.
2054    #[inline]
2055    pub fn tweedie_phi_is_estimated(&self) -> bool {
2056        self.scale.tweedie_phi_is_estimated()
2057    }
2058
2059    /// Mutate the Tweedie dispersion `phi` in place. Unlike Beta, the Tweedie
2060    /// power `p` (not `phi`) is what is carried on the `ResponseFamily::Tweedie`
2061    /// variant; the dispersion lives purely in the scale metadata and is read by
2062    /// the IRLS weight (`prior·μ^{2−p}/phi`) through `fixed_phi()`. So updating
2063    /// the metadata here is sufficient to thread the estimated `phi` into every
2064    /// weight / covariance expression. No-op for non-Tweedie families (issue
2065    /// #771).
2066    #[inline]
2067    pub fn with_tweedie_phi(mut self, phi: f64) -> Self {
2068        if matches!(self.spec.response, ResponseFamily::Tweedie { .. }) {
2069            self.scale = LikelihoodScaleMetadata::EstimatedTweediePhi { phi };
2070        }
2071        self
2072    }
2073
2074    /// Whether the Negative-Binomial overdispersion `theta` is estimated from
2075    /// data (issue #802).
2076    #[inline]
2077    pub fn negbin_theta_is_estimated(&self) -> bool {
2078        self.scale.negbin_theta_is_estimated()
2079    }
2080
2081    /// Mutate the Negative-Binomial overdispersion `theta` in place, on BOTH the
2082    /// family variant (where every PIRLS weight / deviance / log-likelihood
2083    /// expression reads it via `ResponseFamily::NegativeBinomial { theta }`) and
2084    /// the scale metadata (the estimated-vs-fixed contract). No-op for non-NB
2085    /// families. The inner solver calls this once per inner solve after a
2086    /// maximum-likelihood estimate of `theta` from the working residuals, so the
2087    /// IRLS weight `W = μθ/(θ+μ)` and the variance `Var(y)=mu+mu^2/theta` reflect
2088    /// the data's overdispersion rather than the seed `theta` (issue #802). This
2089    /// mirrors `with_beta_phi` exactly — both keep the family variant and the
2090    /// scale metadata as two synchronized views of one estimated parameter.
2091    /// No-op for a user-fixed `theta` (`theta_fixed = true` /
2092    /// `FixedNegBinTheta`, issue #983): the held value is the contract, and
2093    /// this mutator must never let an estimation path overwrite it — the
2094    /// PIRLS refresh gate (`negbin_theta_is_estimated()`) already skips the
2095    /// call, this enforces the same invariant at the data itself.
2096    #[inline]
2097    pub fn with_negbin_theta(mut self, theta: f64) -> Self {
2098        if let ResponseFamily::NegativeBinomial {
2099            theta: family_theta,
2100            theta_fixed,
2101        } = &mut self.spec.response
2102            && !*theta_fixed
2103        {
2104            *family_theta = theta;
2105            self.scale = LikelihoodScaleMetadata::EstimatedNegBinTheta { theta };
2106        }
2107        self
2108    }
2109
2110    /// The estimated Negative-Binomial `theta`, read from the family variant
2111    /// (the canonical store), or `None` for non-NB families.
2112    #[inline]
2113    pub fn negbin_theta(&self) -> Option<f64> {
2114        match self.spec.response {
2115            ResponseFamily::NegativeBinomial { theta, .. } => Some(theta),
2116            _ => None,
2117        }
2118    }
2119
2120    /// Produce a copy of this spec with the Tweedie exponential-dispersion
2121    /// `phi` PINNED at `phi` for the duration of the smoothing-parameter (λ)
2122    /// search (#1477). Converts an `EstimatedTweediePhi` scale into the
2123    /// statistically-identical `FixedDispersion` form, which gates off the
2124    /// per-inner-solve Pearson refresh in
2125    /// `GamWorkingModel::update_with_curvature` (its guard is
2126    /// `tweedie_phi_is_estimated()`, which `FixedDispersion` does not satisfy)
2127    /// while leaving every weight / variance / covariance expression unchanged
2128    /// (they read `phi` through `fixed_phi()`, which `FixedDispersion` answers
2129    /// identically).
2130    ///
2131    /// Rationale: with `phi` estimated, the inner solver re-derives it from each
2132    /// outer iterate's *warm-start* η (the Pearson moment estimator
2133    /// `phî = Σ wᵢ(yᵢ−μᵢ)²/μᵢ^p / Σ wᵢ`). The Tweedie LAML omits the
2134    /// `phi`-dependent saddlepoint normalizer `a(y,φ)` from `−ℓ(β̂)` — valid only
2135    /// when `phi` is fixed across the surface — so a drifting `phi` makes
2136    /// `F(ρ)` a non-stationary function of ρ that REWARDS dispersion inflation:
2137    /// driving a double-penalty null-space `λ` up kills a genuinely-supported
2138    /// linear trend, the residuals grow, the warm-start `phî` rises, and the
2139    /// `[yθ−κ]/φ` deviance term shrinks with no compensating normalizer penalty,
2140    /// so the criterion falls and the outer optimizer rails `λ_null` to the box
2141    /// bound (the #1477 Tweedie double-penalty boundary blow-up). Holding `phi`
2142    /// fixed across the λ-search makes `F(ρ) = REML(ρ, φ_frozen)` a genuine
2143    /// stationary function of ρ, exactly as for the Gaussian profiled scale
2144    /// (whose `(n−Mp)/2·log(2πφ̂)` normalizer is retained) and as mgcv does for
2145    /// Tweedie. `phi` is still Pearson-refreshed at the single final reported fit
2146    /// (the `refine_dispersion_at_converged_eta = true` accept-fit). No-op for
2147    /// non-Tweedie families and for a user-fixed `phi`.
2148    #[inline]
2149    pub fn with_tweedie_phi_frozen_for_search(mut self, phi: f64) -> Self {
2150        if matches!(self.spec.response, ResponseFamily::Tweedie { .. })
2151            && self.scale.tweedie_phi_is_estimated()
2152        {
2153            self.scale = LikelihoodScaleMetadata::FixedDispersion { phi };
2154        }
2155        self
2156    }
2157
2158    /// Produce a copy of this spec with the Negative-Binomial overdispersion
2159    /// `theta` PINNED at `theta` for the duration of the smoothing-parameter
2160    /// (λ) search (#1082). Converts an `EstimatedNegBinTheta` spec into the
2161    /// statistically-identical `FixedNegBinTheta` form (`theta_fixed = true`),
2162    /// which gates off the per-inner-solve ML refresh in
2163    /// `GamWorkingModel::update_with_curvature` (its guard is
2164    /// `negbin_theta_is_estimated()`).
2165    ///
2166    /// Rationale: with θ estimated, the inner solver re-derives θ from each
2167    /// outer iterate's *warm-start* η, so θ — and hence the NB working response,
2168    /// deviance and penalty-logdet that feed the REML criterion — drifts every
2169    /// outer evaluation. The outer optimizer then chases a moving target and the
2170    /// projected-gradient convergence test never trips, grinding the loop to
2171    /// `max_iter` (the #1082 negative-binomial tensor timeout). Holding θ fixed
2172    /// across the λ-search makes the REML objective `F(ρ) = REML(ρ, θ_frozen)` a
2173    /// genuine stationary function of ρ, so the loop converges in a handful of
2174    /// iterations — and θ is still ML-refreshed at the single final, reported fit
2175    /// (the `refine_dispersion_at_converged_eta = true` accept-fit), exactly as
2176    /// the function-level docs require ("estimate the scale at the converged fit,
2177    /// not inside the λ search; mgcv likewise"). No-op for non-NB families and
2178    /// for an already user-fixed θ.
2179    #[inline]
2180    pub fn with_negbin_theta_frozen_for_search(mut self, theta: f64) -> Self {
2181        if let ResponseFamily::NegativeBinomial {
2182            theta: family_theta,
2183            theta_fixed,
2184        } = &mut self.spec.response
2185        {
2186            *family_theta = theta;
2187            *theta_fixed = true;
2188            self.scale = LikelihoodScaleMetadata::FixedNegBinTheta { theta };
2189        }
2190        self
2191    }
2192}
2193
2194#[cfg(test)]
2195mod tests {
2196    use super::*;
2197    use ndarray::arr1;
2198
2199    // -----------------------------------------------------------------------
2200    // CoefficientGroupPrior::validate
2201    // -----------------------------------------------------------------------
2202
2203    #[test]
2204    fn prior_flat_always_ok() {
2205        assert!(CoefficientGroupPrior::Flat.validate("ctx").is_ok());
2206    }
2207
2208    #[test]
2209    fn prior_normal_log_precision_valid() {
2210        assert!(
2211            CoefficientGroupPrior::NormalLogPrecision { mean: 0.0, sd: 1.0 }
2212                .validate("ctx")
2213                .is_ok()
2214        );
2215    }
2216
2217    #[test]
2218    fn prior_normal_log_precision_infinite_mean_errors() {
2219        assert!(
2220            CoefficientGroupPrior::NormalLogPrecision { mean: f64::INFINITY, sd: 1.0 }
2221                .validate("ctx")
2222                .is_err()
2223        );
2224    }
2225
2226    #[test]
2227    fn prior_normal_log_precision_zero_sd_errors() {
2228        assert!(
2229            CoefficientGroupPrior::NormalLogPrecision { mean: 0.0, sd: 0.0 }
2230                .validate("ctx")
2231                .is_err()
2232        );
2233    }
2234
2235    #[test]
2236    fn prior_normal_log_precision_negative_sd_errors() {
2237        assert!(
2238            CoefficientGroupPrior::NormalLogPrecision { mean: 0.0, sd: -1.0 }
2239                .validate("ctx")
2240                .is_err()
2241        );
2242    }
2243
2244    #[test]
2245    fn prior_gamma_precision_valid() {
2246        assert!(
2247            CoefficientGroupPrior::GammaPrecision { shape: 1.0, rate: 0.0 }
2248                .validate("ctx")
2249                .is_ok()
2250        );
2251    }
2252
2253    #[test]
2254    fn prior_gamma_precision_zero_shape_errors() {
2255        assert!(
2256            CoefficientGroupPrior::GammaPrecision { shape: 0.0, rate: 1.0 }
2257                .validate("ctx")
2258                .is_err()
2259        );
2260    }
2261
2262    #[test]
2263    fn prior_gamma_precision_negative_rate_errors() {
2264        assert!(
2265            CoefficientGroupPrior::GammaPrecision { shape: 1.0, rate: -0.1 }
2266                .validate("ctx")
2267                .is_err()
2268        );
2269    }
2270
2271    #[test]
2272    fn prior_penalized_complexity_valid() {
2273        assert!(
2274            CoefficientGroupPrior::PenalizedComplexity { upper: 1.0, tail_prob: 0.05 }
2275                .validate("ctx")
2276                .is_ok()
2277        );
2278    }
2279
2280    #[test]
2281    fn prior_penalized_complexity_zero_upper_errors() {
2282        assert!(
2283            CoefficientGroupPrior::PenalizedComplexity { upper: 0.0, tail_prob: 0.05 }
2284                .validate("ctx")
2285                .is_err()
2286        );
2287    }
2288
2289    #[test]
2290    fn prior_penalized_complexity_tail_prob_zero_errors() {
2291        assert!(
2292            CoefficientGroupPrior::PenalizedComplexity { upper: 1.0, tail_prob: 0.0 }
2293                .validate("ctx")
2294                .is_err()
2295        );
2296    }
2297
2298    #[test]
2299    fn prior_penalized_complexity_tail_prob_one_errors() {
2300        assert!(
2301            CoefficientGroupPrior::PenalizedComplexity { upper: 1.0, tail_prob: 1.0 }
2302                .validate("ctx")
2303                .is_err()
2304        );
2305    }
2306
2307    // -----------------------------------------------------------------------
2308    // LatentCLogLogState::new
2309    // -----------------------------------------------------------------------
2310
2311    #[test]
2312    fn latent_cloglog_zero_sd_ok() {
2313        assert!(LatentCLogLogState::new(0.0).is_ok());
2314    }
2315
2316    #[test]
2317    fn latent_cloglog_positive_sd_ok() {
2318        assert!(LatentCLogLogState::new(1.5).is_ok());
2319    }
2320
2321    #[test]
2322    fn latent_cloglog_negative_sd_errors() {
2323        assert!(LatentCLogLogState::new(-0.1).is_err());
2324    }
2325
2326    #[test]
2327    fn latent_cloglog_infinite_sd_errors() {
2328        assert!(LatentCLogLogState::new(f64::INFINITY).is_err());
2329    }
2330
2331    #[test]
2332    fn latent_cloglog_nan_errors() {
2333        assert!(LatentCLogLogState::new(f64::NAN).is_err());
2334    }
2335
2336    // -----------------------------------------------------------------------
2337    // WigglePenaltyConfig::cubic_triple_operator_default
2338    // -----------------------------------------------------------------------
2339
2340    #[test]
2341    fn wiggle_penalty_default_fields() {
2342        let cfg = WigglePenaltyConfig::cubic_triple_operator_default();
2343        assert_eq!(cfg.degree, 3);
2344        assert_eq!(cfg.num_internal_knots, 8);
2345        assert_eq!(cfg.penalty_orders, vec![1, 2, 3]);
2346        assert!(cfg.double_penalty);
2347        assert!((cfg.monotonicity_eps - 1e-4).abs() < 1e-15);
2348    }
2349
2350    // -----------------------------------------------------------------------
2351    // is_valid_tweedie_power
2352    // -----------------------------------------------------------------------
2353
2354    #[test]
2355    fn tweedie_power_valid_interior() {
2356        assert!(is_valid_tweedie_power(1.5));
2357        assert!(is_valid_tweedie_power(1.1));
2358        assert!(is_valid_tweedie_power(1.9));
2359    }
2360
2361    #[test]
2362    fn tweedie_power_boundaries_invalid() {
2363        assert!(!is_valid_tweedie_power(1.0));
2364        assert!(!is_valid_tweedie_power(2.0));
2365    }
2366
2367    #[test]
2368    fn tweedie_power_outside_interval_invalid() {
2369        assert!(!is_valid_tweedie_power(0.5));
2370        assert!(!is_valid_tweedie_power(2.5));
2371        assert!(!is_valid_tweedie_power(-1.0));
2372        assert!(!is_valid_tweedie_power(f64::INFINITY));
2373    }
2374
2375    // -----------------------------------------------------------------------
2376    // StandardLink <-> LinkFunction conversions
2377    // -----------------------------------------------------------------------
2378
2379    #[test]
2380    fn standard_link_roundtrip_to_link_function() {
2381        assert_eq!(StandardLink::Logit.as_link_function(), LinkFunction::Logit);
2382        assert_eq!(StandardLink::Probit.as_link_function(), LinkFunction::Probit);
2383        assert_eq!(StandardLink::CLogLog.as_link_function(), LinkFunction::CLogLog);
2384        assert_eq!(StandardLink::Identity.as_link_function(), LinkFunction::Identity);
2385        assert_eq!(StandardLink::Log.as_link_function(), LinkFunction::Log);
2386    }
2387
2388    #[test]
2389    fn standard_link_from_link_function_state_bearing_errors() {
2390        assert!(StandardLink::try_from(LinkFunction::Sas).is_err());
2391        assert!(StandardLink::try_from(LinkFunction::BetaLogistic).is_err());
2392    }
2393
2394    #[test]
2395    fn standard_link_from_link_function_standard_ok() {
2396        assert_eq!(StandardLink::try_from(LinkFunction::Logit), Ok(StandardLink::Logit));
2397        assert_eq!(StandardLink::try_from(LinkFunction::Log), Ok(StandardLink::Log));
2398    }
2399
2400    // -----------------------------------------------------------------------
2401    // LikelihoodSpec: legal-cell matrix
2402    // -----------------------------------------------------------------------
2403
2404    #[test]
2405    fn legal_cells_accepted() {
2406        assert!(LikelihoodSpec::try_new(
2407            ResponseFamily::Gaussian,
2408            InverseLink::Standard(StandardLink::Identity)
2409        )
2410        .is_ok());
2411        assert!(LikelihoodSpec::try_new(
2412            ResponseFamily::Poisson,
2413            InverseLink::Standard(StandardLink::Log)
2414        )
2415        .is_ok());
2416        assert!(LikelihoodSpec::try_new(
2417            ResponseFamily::Gamma,
2418            InverseLink::Standard(StandardLink::Log)
2419        )
2420        .is_ok());
2421        assert!(LikelihoodSpec::try_new(
2422            ResponseFamily::Beta { phi: 1.0 },
2423            InverseLink::Standard(StandardLink::Logit)
2424        )
2425        .is_ok());
2426        assert!(LikelihoodSpec::try_new(
2427            ResponseFamily::Binomial,
2428            InverseLink::Standard(StandardLink::Logit)
2429        )
2430        .is_ok());
2431        assert!(LikelihoodSpec::try_new(
2432            ResponseFamily::Binomial,
2433            InverseLink::Standard(StandardLink::Probit)
2434        )
2435        .is_ok());
2436        assert!(LikelihoodSpec::try_new(
2437            ResponseFamily::Binomial,
2438            InverseLink::Standard(StandardLink::CLogLog)
2439        )
2440        .is_ok());
2441    }
2442
2443    #[test]
2444    fn illegal_cells_rejected() {
2445        assert!(LikelihoodSpec::try_new(
2446            ResponseFamily::Poisson,
2447            InverseLink::Standard(StandardLink::Identity)
2448        )
2449        .is_err());
2450        assert!(LikelihoodSpec::try_new(
2451            ResponseFamily::Gaussian,
2452            InverseLink::Standard(StandardLink::Logit)
2453        )
2454        .is_err());
2455        assert!(LikelihoodSpec::try_new(
2456            ResponseFamily::Binomial,
2457            InverseLink::Standard(StandardLink::Log)
2458        )
2459        .is_err());
2460        assert!(LikelihoodSpec::try_new(
2461            ResponseFamily::Binomial,
2462            InverseLink::Standard(StandardLink::Identity)
2463        )
2464        .is_err());
2465    }
2466
2467    #[test]
2468    fn likelihood_spec_kind_names() {
2469        assert_eq!(LikelihoodSpec::gaussian_identity().name(), "gaussian");
2470        assert_eq!(LikelihoodSpec::poisson_log().name(), "poisson-log");
2471        assert_eq!(LikelihoodSpec::binomial_logit().name(), "binomial-logit");
2472        assert_eq!(LikelihoodSpec::gamma_log().name(), "gamma-log");
2473    }
2474
2475    // -----------------------------------------------------------------------
2476    // ResponseFamily::infer_from_response
2477    // -----------------------------------------------------------------------
2478
2479    #[test]
2480    fn infer_binary_kind_gives_binomial() {
2481        let y = arr1(&[0.0_f64, 1.0]);
2482        let result = ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Binary);
2483        assert!(matches!(result, Ok(ResponseFamily::Binomial)));
2484    }
2485
2486    #[test]
2487    fn infer_categorical_kind_refuses() {
2488        let y = arr1(&[0.0_f64, 1.0]);
2489        let result = ResponseFamily::infer_from_response(
2490            y.view(),
2491            ResponseColumnKind::Categorical { levels: vec!["yes".to_string(), "no".to_string()] },
2492        );
2493        assert!(result.is_err());
2494    }
2495
2496    #[test]
2497    fn infer_numeric_binary_values_gives_binomial() {
2498        let y = arr1(&[0.0_f64, 1.0, 0.0, 1.0, 0.0]);
2499        let result =
2500            ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Numeric);
2501        assert!(matches!(result, Ok(ResponseFamily::Binomial)));
2502    }
2503
2504    #[test]
2505    fn infer_numeric_count_values_gives_poisson() {
2506        let y = arr1(&[0.0_f64, 1.0, 2.0, 3.0, 5.0]);
2507        let result =
2508            ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Numeric);
2509        assert!(matches!(result, Ok(ResponseFamily::Poisson)));
2510    }
2511
2512    #[test]
2513    fn infer_numeric_fractional_gives_gaussian() {
2514        let y = arr1(&[1.5_f64, 2.3, 3.7]);
2515        let result =
2516            ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Numeric);
2517        assert!(matches!(result, Ok(ResponseFamily::Gaussian)));
2518    }
2519
2520    #[test]
2521    fn infer_numeric_negative_gives_gaussian() {
2522        let y = arr1(&[-1.0_f64, 0.0, 1.0]);
2523        let result =
2524            ResponseFamily::infer_from_response(y.view(), ResponseColumnKind::Numeric);
2525        assert!(matches!(result, Ok(ResponseFamily::Gaussian)));
2526    }
2527
2528    // -----------------------------------------------------------------------
2529    // ResponseFamily::validate_response_support
2530    // -----------------------------------------------------------------------
2531
2532    #[test]
2533    fn gaussian_support_accepts_any_finite() {
2534        let y = arr1(&[-100.0_f64, 0.0, 100.0]);
2535        assert!(ResponseFamily::Gaussian.validate_response_support(y.view()).is_ok());
2536    }
2537
2538    #[test]
2539    fn gamma_support_rejects_zero() {
2540        let y = arr1(&[0.0_f64, 1.0, 2.0]);
2541        assert!(ResponseFamily::Gamma.validate_response_support(y.view()).is_err());
2542    }
2543
2544    #[test]
2545    fn gamma_support_rejects_negative() {
2546        let y = arr1(&[-1.0_f64, 1.0]);
2547        assert!(ResponseFamily::Gamma.validate_response_support(y.view()).is_err());
2548    }
2549
2550    #[test]
2551    fn gamma_support_accepts_positive() {
2552        let y = arr1(&[0.1_f64, 1.0, 100.0]);
2553        assert!(ResponseFamily::Gamma.validate_response_support(y.view()).is_ok());
2554    }
2555
2556    #[test]
2557    fn binomial_support_rejects_fractional() {
2558        let y = arr1(&[0.0_f64, 0.5, 1.0]);
2559        assert!(ResponseFamily::Binomial.validate_response_support(y.view()).is_err());
2560    }
2561
2562    #[test]
2563    fn binomial_support_accepts_binary() {
2564        let y = arr1(&[0.0_f64, 1.0, 0.0, 1.0]);
2565        assert!(ResponseFamily::Binomial.validate_response_support(y.view()).is_ok());
2566    }
2567
2568    #[test]
2569    fn poisson_support_rejects_negative() {
2570        let y = arr1(&[-1.0_f64, 0.0, 1.0]);
2571        assert!(ResponseFamily::Poisson.validate_response_support(y.view()).is_err());
2572    }
2573
2574    #[test]
2575    fn poisson_support_accepts_nonneg() {
2576        let y = arr1(&[0.0_f64, 1.0, 2.0, 10.0]);
2577        assert!(ResponseFamily::Poisson.validate_response_support(y.view()).is_ok());
2578    }
2579
2580    #[test]
2581    fn beta_support_rejects_zero_boundary() {
2582        let y = arr1(&[0.0_f64, 0.5]);
2583        assert!(ResponseFamily::Beta { phi: 1.0 }.validate_response_support(y.view()).is_err());
2584    }
2585
2586    #[test]
2587    fn beta_support_rejects_one_boundary() {
2588        let y = arr1(&[0.5_f64, 1.0]);
2589        assert!(ResponseFamily::Beta { phi: 1.0 }.validate_response_support(y.view()).is_err());
2590    }
2591
2592    #[test]
2593    fn beta_support_accepts_open_interval() {
2594        let y = arr1(&[0.1_f64, 0.5, 0.9]);
2595        assert!(ResponseFamily::Beta { phi: 1.0 }.validate_response_support(y.view()).is_ok());
2596    }
2597
2598    // -----------------------------------------------------------------------
2599    // ResponseFamily::validate_response_degeneracy
2600    // -----------------------------------------------------------------------
2601
2602    #[test]
2603    fn binomial_degeneracy_all_zeros_errors() {
2604        let y = arr1(&[0.0_f64, 0.0, 0.0]);
2605        assert!(ResponseFamily::Binomial.validate_response_degeneracy(y.view()).is_err());
2606    }
2607
2608    #[test]
2609    fn binomial_degeneracy_all_ones_errors() {
2610        let y = arr1(&[1.0_f64, 1.0, 1.0]);
2611        assert!(ResponseFamily::Binomial.validate_response_degeneracy(y.view()).is_err());
2612    }
2613
2614    #[test]
2615    fn binomial_degeneracy_mixed_ok() {
2616        let y = arr1(&[0.0_f64, 1.0, 0.0]);
2617        assert!(ResponseFamily::Binomial.validate_response_degeneracy(y.view()).is_ok());
2618    }
2619
2620    #[test]
2621    fn gaussian_degeneracy_exactly_constant_errors() {
2622        // An exactly-constant response has sample sd 0 ⇒ the marginal
2623        // `−n/2·log σ²` diverges; the guard must reject it pre-fit (#332).
2624        let y = arr1(&[1.0_f64, 1.0, 1.0]);
2625        let err = ResponseFamily::Gaussian
2626            .validate_response_degeneracy(y.view())
2627            .expect_err("exactly-constant Gaussian response must be rejected");
2628        assert!(matches!(
2629            err.kind,
2630            ResponseDegeneracyKind::GaussianNearConstant { .. }
2631        ));
2632    }
2633
2634    #[test]
2635    fn gaussian_degeneracy_near_constant_reproducer_errors() {
2636        // The issue reproducer: a response with sd ~ 1e-13, well below the
2637        // `1e-10` floor, so the REML score blows up to +inf without the guard.
2638        let y = arr1(&[
2639            5.0_f64,
2640            5.0 + 1.0e-13,
2641            5.0 - 1.0e-13,
2642            5.0 + 2.0e-13,
2643            5.0 - 2.0e-13,
2644        ]);
2645        let err = ResponseFamily::Gaussian
2646            .validate_response_degeneracy(y.view())
2647            .expect_err("near-constant Gaussian response must be rejected");
2648        match err.kind {
2649            ResponseDegeneracyKind::GaussianNearConstant { sample_sd, min_sd } => {
2650                assert!(
2651                    sample_sd <= min_sd,
2652                    "guard must fire only when sample_sd ({sample_sd:.3e}) <= min_sd ({min_sd:.0e})"
2653                );
2654                assert_eq!(min_sd, GAUSSIAN_MIN_SAMPLE_SD);
2655                // The message quotes both numbers verbatim.
2656                let msg = err.message_for("y");
2657                assert!(msg.contains("effectively constant"), "msg = {msg}");
2658            }
2659            other => panic!("expected GaussianNearConstant, got {other:?}"),
2660        }
2661    }
2662
2663    #[test]
2664    fn gaussian_degeneracy_well_conditioned_ok() {
2665        // A genuinely varying response (sd ~ O(1)) is never tripped.
2666        let y = arr1(&[-2.0_f64, 0.5, 1.7, 3.0, -1.1, 2.2]);
2667        assert!(ResponseFamily::Gaussian
2668            .validate_response_degeneracy(y.view())
2669            .is_ok());
2670    }
2671
2672    #[test]
2673    fn gaussian_degeneracy_small_signal_above_floor_ok() {
2674        // sd ~ 1e-6 is small but far above the 1e-10 floor: a legitimately
2675        // small-but-real signal (e.g. a finely-resolved measurement) must fit,
2676        // so the guard must not over-reject.
2677        let y = arr1(&[1.0_f64, 1.0 + 1.0e-6, 1.0 - 1.0e-6, 1.0 + 2.0e-6]);
2678        assert!(ResponseFamily::Gaussian
2679            .validate_response_degeneracy(y.view())
2680            .is_ok());
2681    }
2682
2683    #[test]
2684    fn gaussian_degeneracy_single_observation_ok() {
2685        // Fewer than two observations carries no estimable scale degeneracy;
2686        // the sample-size gate handles too-small data separately.
2687        let y = arr1(&[42.0_f64]);
2688        assert!(ResponseFamily::Gaussian
2689            .validate_response_degeneracy(y.view())
2690            .is_ok());
2691    }
2692
2693    // -----------------------------------------------------------------------
2694    // ResponseFamily::mean_clamp_bounds / response_support_bounds
2695    // -----------------------------------------------------------------------
2696
2697    #[test]
2698    fn mean_clamp_bounds_binomial_unit_interval() {
2699        assert_eq!(ResponseFamily::Binomial.mean_clamp_bounds(), Some((0.0, 1.0)));
2700    }
2701
2702    #[test]
2703    fn mean_clamp_bounds_gaussian_none() {
2704        assert_eq!(ResponseFamily::Gaussian.mean_clamp_bounds(), None);
2705    }
2706
2707    #[test]
2708    fn mean_clamp_bounds_poisson_none() {
2709        assert_eq!(ResponseFamily::Poisson.mean_clamp_bounds(), None);
2710    }
2711
2712    #[test]
2713    fn response_support_bounds_gamma_nonneg_to_inf() {
2714        assert_eq!(
2715            ResponseFamily::Gamma.response_support_bounds(),
2716            Some((0.0, f64::INFINITY))
2717        );
2718    }
2719
2720    #[test]
2721    fn response_support_bounds_binomial_unit_interval() {
2722        assert_eq!(ResponseFamily::Binomial.response_support_bounds(), Some((0.0, 1.0)));
2723    }
2724
2725    #[test]
2726    fn response_support_bounds_gaussian_none() {
2727        assert_eq!(ResponseFamily::Gaussian.response_support_bounds(), None);
2728    }
2729
2730    // -----------------------------------------------------------------------
2731    // ResponseSupportViolation::message_for
2732    // -----------------------------------------------------------------------
2733
2734    #[test]
2735    fn violation_message_names_column() {
2736        let y = arr1(&[-1.0_f64]);
2737        let err = ResponseFamily::Gamma
2738            .validate_response_support(y.view())
2739            .unwrap_err();
2740        let msg = err.message_for("my_column");
2741        assert!(msg.contains("my_column"), "message: {msg}");
2742        assert!(msg.contains("Gamma"), "message: {msg}");
2743    }
2744
2745    // -----------------------------------------------------------------------
2746    // inverse_link_to_binomial_spec
2747    // -----------------------------------------------------------------------
2748
2749    #[test]
2750    fn binomial_spec_from_logit_link_ok() {
2751        let link = InverseLink::Standard(StandardLink::Logit);
2752        assert!(inverse_link_to_binomial_spec(&link).is_ok());
2753    }
2754
2755    #[test]
2756    fn binomial_spec_from_log_link_errors() {
2757        let link = InverseLink::Standard(StandardLink::Log);
2758        assert!(inverse_link_to_binomial_spec(&link).is_err());
2759    }
2760
2761    #[test]
2762    fn binomial_spec_from_identity_link_errors() {
2763        let link = InverseLink::Standard(StandardLink::Identity);
2764        assert!(inverse_link_to_binomial_spec(&link).is_err());
2765    }
2766
2767    // -----------------------------------------------------------------------
2768    // FamilySpecKind::name / pretty_name
2769    // -----------------------------------------------------------------------
2770
2771    #[test]
2772    fn family_spec_kind_name_non_binomial_variants() {
2773        assert_eq!(FamilySpecKind::GaussianIdentity.name(), "gaussian");
2774        assert_eq!(FamilySpecKind::PoissonLog.name(), "poisson-log");
2775        assert_eq!(FamilySpecKind::GammaLog.name(), "gamma-log");
2776        assert_eq!(FamilySpecKind::TweedieLog { p: 1.5 }.name(), "tweedie-log");
2777        assert_eq!(
2778            FamilySpecKind::NegativeBinomialLog { theta: 2.0 }.name(),
2779            "negative-binomial-log"
2780        );
2781        assert_eq!(FamilySpecKind::BetaLogit { phi: 5.0 }.name(), "beta-regression-logit");
2782        assert_eq!(FamilySpecKind::RoystonParmar.name(), "royston-parmar");
2783    }
2784
2785    #[test]
2786    fn family_spec_kind_name_binomial_variants() {
2787        assert_eq!(FamilySpecKind::BinomialLogit.name(), "binomial-logit");
2788        assert_eq!(FamilySpecKind::BinomialProbit.name(), "binomial-probit");
2789        assert_eq!(FamilySpecKind::BinomialCLogLog.name(), "binomial-cloglog");
2790    }
2791
2792    #[test]
2793    fn family_spec_kind_pretty_name_gaussian() {
2794        assert_eq!(FamilySpecKind::GaussianIdentity.pretty_name(), "Gaussian Identity");
2795    }
2796
2797    #[test]
2798    fn family_spec_kind_pretty_name_binomial_logit() {
2799        assert_eq!(FamilySpecKind::BinomialLogit.pretty_name(), "Binomial Logit");
2800    }
2801
2802    // -----------------------------------------------------------------------
2803    // FamilySpecKind::is_binomial and companions
2804    // -----------------------------------------------------------------------
2805
2806    #[test]
2807    fn is_binomial_true_for_all_binomial_variants() {
2808        assert!(FamilySpecKind::BinomialLogit.is_binomial());
2809        assert!(FamilySpecKind::BinomialProbit.is_binomial());
2810        assert!(FamilySpecKind::BinomialCLogLog.is_binomial());
2811    }
2812
2813    #[test]
2814    fn is_binomial_false_for_non_binomial_variants() {
2815        assert!(!FamilySpecKind::GaussianIdentity.is_binomial());
2816        assert!(!FamilySpecKind::PoissonLog.is_binomial());
2817        assert!(!FamilySpecKind::GammaLog.is_binomial());
2818        assert!(!FamilySpecKind::RoystonParmar.is_binomial());
2819        assert!(!FamilySpecKind::TweedieLog { p: 1.5 }.is_binomial());
2820        assert!(!FamilySpecKind::NegativeBinomialLog { theta: 1.0 }.is_binomial());
2821        assert!(!FamilySpecKind::BetaLogit { phi: 1.0 }.is_binomial());
2822    }
2823
2824    #[test]
2825    fn is_gaussian_identity_true_only_for_gaussian() {
2826        assert!(FamilySpecKind::GaussianIdentity.is_gaussian_identity());
2827        assert!(!FamilySpecKind::PoissonLog.is_gaussian_identity());
2828        assert!(!FamilySpecKind::BinomialLogit.is_gaussian_identity());
2829    }
2830
2831    #[test]
2832    fn is_royston_parmar_true_only_for_royston_parmar() {
2833        assert!(FamilySpecKind::RoystonParmar.is_royston_parmar());
2834        assert!(!FamilySpecKind::GaussianIdentity.is_royston_parmar());
2835        assert!(!FamilySpecKind::BinomialLogit.is_royston_parmar());
2836    }
2837
2838    #[test]
2839    fn supports_firth_iff_is_binomial() {
2840        assert!(FamilySpecKind::BinomialLogit.supports_firth());
2841        assert!(FamilySpecKind::BinomialProbit.supports_firth());
2842        assert!(FamilySpecKind::BinomialCLogLog.supports_firth());
2843        assert!(!FamilySpecKind::GaussianIdentity.supports_firth());
2844        assert!(!FamilySpecKind::PoissonLog.supports_firth());
2845        assert!(!FamilySpecKind::GammaLog.supports_firth());
2846        assert!(!FamilySpecKind::RoystonParmar.supports_firth());
2847    }
2848}
gam_spec/lib.rs

gam_spec/
lib.rs