gam_models/fit_orchestration/request.rs
1use super::*;
2
3#[derive(Clone, Debug)]
4pub struct LinkWiggleConfig {
5 pub degree: usize,
6 pub num_internal_knots: usize,
7 pub penalty_orders: Vec<usize>,
8 pub double_penalty: bool,
9}
10
11/// Configuration for the second-stage binomial-mean wiggle fit appended to a
12/// standard pilot. The blockwise refit options live inside this struct so the
13/// pilot config (`link_kind` + `wiggle`) and its required `refit_options` can
14/// never disagree: either the whole standard-wiggle request is `Some`, or it
15/// is `None`. The previous shape had two sibling `Option` fields on
16/// `StandardFitRequest`, which allowed the materialize path to construct an
17/// inconsistent state (#320: linkwiggle config without blockwise options).
18#[derive(Clone)]
19pub struct StandardBinomialWiggleConfig {
20 pub link_kind: InverseLink,
21 pub wiggle: LinkWiggleConfig,
22 pub refit_options: BlockwiseFitOptions,
23}
24
25pub struct StandardFitRequest<'a> {
26 pub data: Array2<f64>,
27 pub y: Array1<f64>,
28 pub weights: Array1<f64>,
29 pub offset: Array1<f64>,
30 pub spec: TermCollectionSpec,
31 pub family: LikelihoodSpec,
32 /// #2026: estimate the Tweedie variance power `p` by profile likelihood
33 /// (mgcv `tw()` semantics) before the final fit, rather than trusting the
34 /// `p` baked into `family`. Set only for a bare `family="tweedie"`/`"tw"`
35 /// request that named no explicit power; an explicit `tweedie(1.6)` pins `p`
36 /// and leaves this `false`. When `true`, `family` must carry
37 /// `ResponseFamily::Tweedie` on a log link (the placeholder power is
38 /// overwritten with the estimate).
39 pub estimate_tweedie_p: bool,
40 pub options: FitOptions,
41 pub kappa_options: SpatialLengthScaleOptimizationOptions,
42 pub wiggle: Option<StandardBinomialWiggleConfig>,
43 pub coefficient_groups: Vec<CoefficientGroupSpec>,
44 pub penalty_block_gamma_priors: Vec<(String, f64, f64)>,
45 pub latent_coord: Option<StandardLatentCoordConfig>,
46 #[doc(hidden)]
47 pub _marker: std::marker::PhantomData<&'a ()>,
48}
49
50pub struct GaussianLocationScaleFitRequest<'a> {
51 pub data: ArrayView2<'a, f64>,
52 pub spec: GaussianLocationScaleTermSpec,
53 pub wiggle: Option<LinkWiggleConfig>,
54 pub options: BlockwiseFitOptions,
55 pub kappa_options: SpatialLengthScaleOptimizationOptions,
56}
57
58pub struct BinomialLocationScaleFitRequest<'a> {
59 pub data: ArrayView2<'a, f64>,
60 pub spec: BinomialLocationScaleTermSpec,
61 pub wiggle: Option<LinkWiggleConfig>,
62 pub options: BlockwiseFitOptions,
63 pub kappa_options: SpatialLengthScaleOptimizationOptions,
64}
65
66pub struct DispersionLocationScaleFitRequest<'a> {
67 pub data: ArrayView2<'a, f64>,
68 pub spec: DispersionGlmLocationScaleTermSpec,
69 pub options: BlockwiseFitOptions,
70 pub kappa_options: SpatialLengthScaleOptimizationOptions,
71}
72
73pub struct SurvivalLocationScaleFitRequest<'a> {
74 pub data: ArrayView2<'a, f64>,
75 pub spec: SurvivalLocationScaleTermSpec,
76 pub wiggle: Option<LinkWiggleConfig>,
77 pub kappa_options: SpatialLengthScaleOptimizationOptions,
78 pub optimize_inverse_link: bool,
79 /// See [`gam_custom_family::BlockwiseFitOptions::cache_session`].
80 /// Threaded into the internally constructed `BlockwiseFitOptions` by
81 /// `fit_survival_location_scale_model`.
82 pub cache_session: Option<std::sync::Arc<gam_runtime::warm_start::Session>>,
83}
84
85pub struct SurvivalTransformationFitRequest<'a> {
86 pub data: ArrayView2<'a, f64>,
87 pub spec: SurvivalTransformationTermSpec,
88 /// See [`gam_custom_family::BlockwiseFitOptions::cache_session`].
89 /// Threaded into the internally constructed `BlockwiseFitOptions` by
90 /// `fit_survival_transformation_model`.
91 pub cache_session: Option<std::sync::Arc<gam_runtime::warm_start::Session>>,
92}
93
94#[derive(Clone)]
95pub struct SurvivalTransformationTermSpec {
96 pub age_entry: Array1<f64>,
97 pub age_exit: Array1<f64>,
98 pub event_target: Array1<u8>,
99 pub weights: Array1<f64>,
100 pub covariate_spec: TermCollectionSpec,
101 pub covariate_offset: Array1<f64>,
102 pub baseline_cfg: crate::survival::SurvivalBaselineConfig,
103 pub likelihood_mode: crate::survival::SurvivalLikelihoodMode,
104 pub time_anchor: f64,
105 pub time_build: crate::survival::SurvivalTimeBuildOutput,
106 pub timewiggle: Option<LinkWiggleFormulaSpec>,
107 pub weibull_seed: Option<(f64, f64)>,
108 pub ridge_lambda: f64,
109 pub penalty_block_gamma_priors: Vec<(String, f64, f64)>,
110}
111pub struct BernoulliMarginalSlopeFitRequest<'a> {
112 pub data: ArrayView2<'a, f64>,
113 pub spec: BernoulliMarginalSlopeTermSpec,
114 pub options: BlockwiseFitOptions,
115 pub kappa_options: SpatialLengthScaleOptimizationOptions,
116 pub policy: gam_runtime::resource::ResourcePolicy,
117}
118
119pub struct SurvivalMarginalSlopeFitRequest<'a> {
120 pub data: ArrayView2<'a, f64>,
121 pub spec: SurvivalMarginalSlopeTermSpec,
122 pub options: BlockwiseFitOptions,
123 pub kappa_options: SpatialLengthScaleOptimizationOptions,
124}
125pub struct LatentSurvivalFitRequest<'a> {
126 pub data: ArrayView2<'a, f64>,
127 pub spec: LatentSurvivalTermSpec,
128 pub frailty: FrailtySpec,
129 pub options: BlockwiseFitOptions,
130}
131
132pub struct LatentBinaryFitRequest<'a> {
133 pub data: ArrayView2<'a, f64>,
134 pub spec: LatentBinaryTermSpec,
135 pub frailty: FrailtySpec,
136 pub options: BlockwiseFitOptions,
137}
138
139pub struct TransformationNormalFitRequest<'a> {
140 pub data: ArrayView2<'a, f64>,
141 pub response: Array1<f64>,
142 pub weights: Array1<f64>,
143 pub offset: Array1<f64>,
144 pub covariate_spec: TermCollectionSpec,
145 pub config: TransformationNormalConfig,
146 pub options: BlockwiseFitOptions,
147 pub kappa_options: SpatialLengthScaleOptimizationOptions,
148 pub warm_start: Option<TransformationWarmStart>,
149}
150pub enum FitRequest<'a> {
151 Standard(StandardFitRequest<'a>),
152 GaussianLocationScale(GaussianLocationScaleFitRequest<'a>),
153 BinomialLocationScale(BinomialLocationScaleFitRequest<'a>),
154 DispersionLocationScale(DispersionLocationScaleFitRequest<'a>),
155 SurvivalLocationScale(SurvivalLocationScaleFitRequest<'a>),
156 SurvivalTransformation(SurvivalTransformationFitRequest<'a>),
157 BernoulliMarginalSlope(BernoulliMarginalSlopeFitRequest<'a>),
158 SurvivalMarginalSlope(SurvivalMarginalSlopeFitRequest<'a>),
159 LatentSurvival(LatentSurvivalFitRequest<'a>),
160 LatentBinary(LatentBinaryFitRequest<'a>),
161 TransformationNormal(TransformationNormalFitRequest<'a>),
162}
163
164pub struct StandardFitResult {
165 pub fit: UnifiedFitResult,
166 pub design: TermCollectionDesign,
167 pub resolvedspec: TermCollectionSpec,
168 pub adaptive_diagnostics: Option<AdaptiveRegularizationDiagnostics>,
169 pub kappa_timing: Option<SpatialLengthScaleOptimizationTiming>,
170 pub saved_link_state: FittedLinkState,
171 pub wiggle_knots: Option<Array1<f64>>,
172 pub wiggle_degree: Option<usize>,
173 /// Standard-basis link-warp coefficients `β_w = Z·γ` for the saved-model
174 /// predict runtime when the frozen-basis de-aliasing engaged (#1596). The
175 /// fit's coefficients stay in the reduced `γ` coordinate; this lift is
176 /// persisted into the payload's `beta_link_wiggle`.
177 pub wiggle_saved_warp_beta: Option<Vec<f64>>,
178}
179
180pub struct SurvivalLocationScaleFitResult {
181 pub fit: SurvivalLocationScaleTermFitResult,
182 pub inverse_link: InverseLink,
183 pub wiggle_knots: Option<Array1<f64>>,
184 pub wiggle_degree: Option<usize>,
185}
186
187pub struct SurvivalTransformationFitResult {
188 pub fit: UnifiedFitResult,
189 pub resolvedspec: TermCollectionSpec,
190 pub baseline_cfg: crate::survival::SurvivalBaselineConfig,
191 pub likelihood_mode: crate::survival::SurvivalLikelihoodMode,
192 /// Persistable snapshot of the time basis used during the fit. Replaces
193 /// six previously flat fields (basisname / degree / knots / keep_cols /
194 /// smooth_lambda / anchor) so the FFI save path consumes a single
195 /// source-of-truth value rather than threading siblings independently.
196 pub time_basis: crate::survival::SavedSurvivalTimeBasis,
197 pub time_base_ncols: usize,
198 pub baseline_timewiggle: Option<TimeWiggleBlockInput>,
199}
200
201pub enum FitResult {
202 Standard(StandardFitResult),
203 GaussianLocationScale(GaussianLocationScaleFitResult),
204 BinomialLocationScale(BinomialLocationScaleFitResult),
205 DispersionLocationScale(DispersionLocationScaleFitResult),
206 SurvivalLocationScale(SurvivalLocationScaleFitResult),
207 SurvivalTransformation(SurvivalTransformationFitResult),
208 BernoulliMarginalSlope(BernoulliMarginalSlopeFitResult),
209 SurvivalMarginalSlope(SurvivalMarginalSlopeFitResult),
210 LatentSurvival(LatentSurvivalTermFitResult),
211 LatentBinary(LatentBinaryTermFitResult),
212 TransformationNormal(TransformationNormalFitResult),
213 /// Exact O(n) state-space cubic/linear/quintic smoothing-spline scan
214 /// (#1030/#1034). A scan-bearing model IS a Gaussian-identity model with a
215 /// different (exact) representation: rather than a dense design + coefficient
216 /// vector it carries the Durbin–Koopman smoother posterior directly (knots,
217 /// smoothed states, pointwise variances, σ², log λ, exact diffuse-REML EDF,
218 /// and an exact per-row `predict`). Library callers that want the fitted
219 /// posterior get it here without paying the dense O(n·k²)+O(k³) route; the
220 /// CLI/FFI save paths build the persistence payload from the same
221 /// `SplineScanFit` via `assemble_spline_scan_payload`.
222 SplineScan(gam_solve::spline_scan::SplineScanFit),
223 /// O(n log n) multiresolution residual-cascade smooth (#1032). UNLIKE the
224 /// 1-D scan, the cascade is NOT the same posterior as the Duchon/Matérn term
225 /// it stands in for (a different finite basis — the multilevel Wendland
226 /// frame), so it is never a silent swap: this variant is produced only when
227 /// the structural detector [`residual_cascade_fast_path`] fires on an
228 /// eligible scattered-low-d Gaussian fit past the dense-kernel cliff AND the
229 /// in-cascade quasi-uniformity guard certifies the metric; every other shape
230 /// (and a rejected metric) falls through to the dense `fit_model` path. The
231 /// cascade-bearing model carries the
232 /// [`ResidualCascadeFit`](gam_solve::residual_cascade::ResidualCascadeFit)
233 /// directly — knots-free nested geometry, coefficients, the factored
234 /// precision, and an exact per-row `predict`; the CLI/FFI save paths build
235 /// the persistence payload from its `to_state` snapshot.
236 ResidualCascade(gam_solve::residual_cascade::ResidualCascadeFit),
237}
238
239/// Result of a dispersion-channel GAMLSS location-scale fit (#913). Wraps the
240/// shared two-block [`BlockwiseTermFitResult`] (mean + log-precision designs
241/// and coefficients) plus the family kind so the save path can stamp the right
242/// likelihood. These families have no link-wiggle and no response
243/// standardization, so the result is a thin wrapper.
244pub struct DispersionLocationScaleFitResult {
245 pub fit: BlockwiseTermFitResult,
246 pub kind: DispersionFamilyKind,
247}
248
249/// Out-of-fold Stage-1 latent score and its score-influence Jacobian for a
250/// CTN → marginal-slope chain. `z_oof` (length n) replaces the in-sample `z`
251/// the Stage-2 model consumes; `jac_oof` (n × p₁) is fed to the Stage-2 spec's
252/// `score_influence_jacobian` so the joint solve absorbs the realized leakage
253/// directions `Z_infl = diag(s_f·β̂₀)·J`.
254pub struct CrossFitScoreCalibration {
255 pub z_oof: Array1<f64>,
256 pub jac_oof: Array2<f64>,
257}
258
259/// Internal recipe describing the CTN Stage-1 fit that produced a Stage-2 `z`
260/// column. This is in-process plumbing — never a CLI flag, env var, or feature
261/// gate. The orchestration layer populates [`FitConfig::ctn_stage1`] when (and
262/// only when) the marginal-slope `z` was generated by a transformation-normal
263/// Stage-1 fit; its presence is the sole auto-enable signal for cross-fitted
264/// orthogonalization (design §5). When absent, Stage-2 falls back to the free
265/// 1-D `score_warp` spline (which spans only the x-free leakage column).
266#[derive(Clone, Debug)]
267pub struct CtnStage1Recipe {
268 /// Stage-1 response column name (the `y` the CTN transforms).
269 pub response_column: String,
270 /// Stage-1 covariate-side formula right-hand side (e.g. `"s(pc1) + s(pc2)"`),
271 /// with no `~` and no response symbol. [`crossfit_score_calibration`] parses
272 /// it and builds the CTN covariate basis exactly as
273 /// `materialize_transformation_normal` does, then FREEZES that basis once on
274 /// the full data and reuses the frozen spec for every fold's refit — so the
275 /// rebuilt covariate design has an identical column geometry across folds,
276 /// keeping `J`'s `p₁ = p_resp · p_cov` columns aligned (design §3).
277 ///
278 /// The recipe carries the formula RHS (a primitive string) rather than a
279 /// resolved [`TermCollectionSpec`] because this struct is populated both via
280 /// [`CtnStage1Recipe::new`] (set on [`FitConfig::ctn_stage1`], then
281 /// [`fit_from_formula`]) and by the gamfit FFI marshaller
282 /// (`gamfit/_calibrated_slope.py`), which can only serialize primitives over
283 /// the JSON boundary — a `TermCollectionSpec` is not serializable. Freezing on
284 /// the full Stage-2 data is equivalent to
285 /// freezing on the Stage-1 data whenever the two stages share a frame (the
286 /// calibrated-chain contract), so the column geometry still matches Stage-1.
287 pub covariate_formula_rhs: String,
288 /// Stage-1 CTN config (response basis degree / knot count / penalties).
289 /// Its `response_num_internal_knots` is the FIXED response-basis size; the
290 /// cross-fit pins it across folds so `p_resp` (and hence `p₁`) is
291 /// fold-invariant (design §3).
292 pub config: TransformationNormalConfig,
293 /// Optional Stage-1 weight column name.
294 pub weight_column: Option<String>,
295 /// Optional Stage-1 offset column name.
296 pub offset_column: Option<String>,
297}
298
299impl CtnStage1Recipe {
300 /// Build a Stage-1 CTN recipe from the Stage-1 description. This is the public
301 /// way to populate [`FitConfig::ctn_stage1`] — set it on a marginal-slope
302 /// config and run [`fit_from_formula`] (the entry IS `fit_from_formula` with
303 /// `ctn_stage1` set; there is no separate combined entry function). The
304 /// materializer then cross-fits the CTN and installs the leakage-projection
305 /// block; supplying the recipe *is* the request for orthogonalization.
306 ///
307 /// `response` is the Stage-1 CTN response column; `covariates` is the
308 /// covariate-side formula right-hand side (e.g. `"s(pc1) + s(pc2)"` — no `~`,
309 /// no response symbol). Validates both are non-empty and that `covariates`
310 /// is an RHS only.
311 pub fn new(
312 response: &str,
313 covariates: &str,
314 config: TransformationNormalConfig,
315 weight_column: Option<&str>,
316 offset_column: Option<&str>,
317 ) -> Result<Self, String> {
318 let response_column = response.trim().to_string();
319 if response_column.is_empty() {
320 return Err("CtnStage1Recipe requires a non-empty Stage-1 response column".to_string());
321 }
322 let covariate_formula_rhs = covariates.trim().to_string();
323 if covariate_formula_rhs.is_empty() {
324 return Err(
325 "CtnStage1Recipe requires a non-empty Stage-1 covariate formula RHS".to_string(),
326 );
327 }
328 if covariate_formula_rhs.contains('~') {
329 return Err(
330 "CtnStage1Recipe covariates is a right-hand side only; pass 's(pc1) + s(pc2)', \
331 not 'score ~ s(pc1) + s(pc2)'"
332 .to_string(),
333 );
334 }
335 Ok(Self {
336 response_column,
337 covariate_formula_rhs,
338 config,
339 weight_column: weight_column
340 .map(str::to_string)
341 .filter(|s| !s.trim().is_empty()),
342 offset_column: offset_column
343 .map(str::to_string)
344 .filter(|s| !s.trim().is_empty()),
345 })
346 }
347}
348#[derive(Clone, Debug)]
349pub struct FitConfig {
350 /// Family: "gaussian", "binomial", "poisson", "negative-binomial",
351 /// "gamma", "tweedie" (alias "tw"; variance power fixed at p = 1.5), or
352 /// None for auto-detect.
353 pub family: Option<String>,
354 /// Fixed size/overdispersion parameter for `family="negative-binomial"`.
355 pub negative_binomial_theta: Option<f64>,
356 /// Link: "identity", "logit", "probit", "cloglog", "sas", "beta-logistic", or None.
357 pub link: Option<String>,
358 /// Whether to use flexible (wiggle-augmented) link.
359 pub flexible_link: bool,
360 /// Optional additive offset column for the primary linear predictor.
361 pub offset_column: Option<String>,
362 /// Optional additive offset column for the noise/log-scale predictor.
363 pub noise_offset_column: Option<String>,
364 /// Optional family-level frailty modifier.
365 pub frailty: Option<FrailtySpec>,
366
367 // Survival-specific
368 /// Baseline target: "linear", "weibull", "gompertz", "gompertz-makeham".
369 pub baseline_target: String,
370 pub baseline_scale: Option<f64>,
371 pub baseline_shape: Option<f64>,
372 pub baseline_rate: Option<f64>,
373 pub baseline_makeham: Option<f64>,
374 /// Time basis: "ispline" or "none".
375 pub time_basis: String,
376 pub time_degree: usize,
377 pub time_num_internal_knots: usize,
378 pub time_smooth_lambda: f64,
379 /// Survival likelihood mode: "location-scale", "transformation", "weibull",
380 /// "marginal-slope", "latent", or "latent-binary".
381 pub survival_likelihood: String,
382 /// Residual distribution: "gaussian", "logistic", "gumbel".
383 pub survival_distribution: String,
384 pub threshold_time_k: Option<usize>,
385 pub threshold_time_degree: usize,
386 pub sigma_time_k: Option<usize>,
387 pub sigma_time_degree: usize,
388
389 // Location-scale (GAMLSS)
390 /// If set, fit a location-scale model with this formula for the noise parameter.
391 pub noise_formula: Option<String>,
392
393 // Marginal-slope
394 /// Formula for the log-slope model (survival marginal-slope or Bernoulli marginal-slope).
395 pub logslope_formula: Option<String>,
396 /// Column name for the z (exposure/dose) variable in marginal-slope models.
397 pub z_column: Option<String>,
398 /// Optional non-negative per-row training weights column.
399 pub weight_column: Option<String>,
400 /// Expectile asymmetry `τ ∈ (0, 1)` for `family = "expectile"`.
401 ///
402 /// When `family` resolves to `"expectile"` the fit minimizes the
403 /// Newey–Powell asymmetric squared loss `Σ wᵢ(τ)·(yᵢ − μᵢ)²` with
404 /// `wᵢ(τ) = τ` if `yᵢ > μᵢ` else `1 − τ`, tracing the conditional
405 /// `τ`-expectile — the smooth analogue of the `τ`-quantile. `τ = 0.5`
406 /// reduces exactly to the Gaussian-identity mean fit. The whole penalized
407 /// smooth + REML `λ`-selection machinery is reused via a Least
408 /// Asymmetrically Weighted Squares (LAWS) outer loop. `None` defaults to
409 /// the median expectile `τ = 0.5` when the family is `"expectile"`; it is
410 /// ignored for every other family. The asymmetry may also be written inline
411 /// as `family = "expectile(0.9)"`, which fills this field at resolve time.
412 pub expectile_tau: Option<f64>,
413 /// Internal CTN Stage-1 provenance for the marginal-slope `z` column.
414 ///
415 /// When the marginal-slope `z` was generated by a transformation-normal
416 /// Stage-1 fit, the orchestration layer fills this with the Stage-1 recipe.
417 /// Its presence is the sole auto-enable signal for cross-fitted, Neyman-
418 /// orthogonal score calibration (#461): the materializer cross-fits the CTN
419 /// to produce out-of-fold `z` and the score-influence Jacobian `J`, replaces
420 /// the raw `z` with `z_oof`, and absorbs `J` as a leakage-projection block in
421 /// Stage-2. This is in-process plumbing only — there is no CLI flag, env var,
422 /// or feature gate. `None` ⇒ raw `z` with the free-warp `score_warp`
423 /// fallback. See [`CtnStage1Recipe`].
424 pub ctn_stage1: Option<CtnStage1Recipe>,
425
426 // Fitting options
427 pub scale_dimensions: bool,
428 /// Enable exact spatial adaptive regularization for standard formula fits.
429 /// `None` uses the quality-first automatic policy. The current automatic
430 /// policy leaves LAREG off unless explicitly requested because the
431 /// optimizer's REML-selected local weights can over-regularize small
432 /// high-yield spatial signals.
433 pub adaptive_regularization: Option<bool>,
434 pub ridge_lambda: f64,
435
436 /// Route the fit through the transformation-normal family. When set, the
437 /// formula terms are treated as the covariate side of the transformation
438 /// model and the response basis is built internally. Incompatible with
439 /// `noise_formula` and with `Surv(...)` responses.
440 pub transformation_normal: bool,
441
442 /// Enable Firth bias reduction for standard single-parameter families.
443 pub firth: bool,
444 /// Optional cap on the REML/LAML outer smoothing-parameter iterations for
445 /// standard formula fits. `None` uses the production default.
446 pub outer_max_iter: Option<usize>,
447 /// Optional wall-clock budget (seconds) for the outer smoothing search
448 /// (gam#979). Threaded to the survival marginal-slope fit, whose constrained
449 /// joint-Newton can fail to certify convergence and otherwise grind without
450 /// bound; with this set the fit returns its best-so-far iterate (or a
451 /// catchable error) within the budget instead of hanging. `None` keeps the
452 /// generous built-in default for that path and is unbounded elsewhere.
453 pub outer_wall_clock_budget_secs: Option<f64>,
454
455 /// GPU backend selection policy. `Auto` uses supported device kernels for
456 /// large workloads, `Off` pins execution to CPU kernels, and `Force` fails
457 /// loudly when a requested GPU kernel has no compiled backend.
458 pub gpu_policy: gam_gpu::GpuPolicy,
459 /// Optional override of the [`gam_runtime::resource::ResourcePolicy`] used when
460 /// planning spatial bases (TPS / Matern / Duchon) during term construction.
461 /// When `None`, the default-library policy is used.
462 pub resource_policy: Option<gam_runtime::resource::ResourcePolicy>,
463
464 /// Optional per-group metadata supplied by the caller. Fitting ignores this
465 /// field; saved-model builders pass it through so deployment consumers can
466 /// recover group provenance.
467 pub group_metadata: Option<BTreeMap<String, JsonValue>>,
468
469 /// Optional user-defined coefficient groups with separate precision
470 /// parameters. Group-local priors, including catalog-metadata-informed
471 /// Gamma precision hyperpriors, are resolved during design setup.
472 pub coefficient_groups: Vec<CoefficientGroupSpec>,
473
474 /// Optional per-existing-penalty-block Gamma(shape, rate) precision
475 /// hyperpriors keyed by penalty-block label. This is the
476 /// catalog-metadata-informed-prior hook for models that do not need a new
477 /// user-defined coefficient group.
478 pub penalty_block_gamma_priors: Vec<(String, f64, f64)>,
479
480 /// Python `gamfit.fit(..., latents={...})` configuration. This reaches
481 /// the standard formula workflow as an owned latent-coordinate block:
482 /// the named smooth's synthetic covariates are rebuilt from `t`, and
483 /// joint REML optimizes `[rho, vec(t)]` through latent design hyper-dirs.
484 pub latents: Option<JsonValue>,
485 /// Python `gamfit.fit(..., penalties=[...])` analytic-penalty descriptors,
486 /// validated against the declared latent-coordinate blocks before a
487 /// standard latent fit starts.
488 pub analytic_penalties: Option<JsonValue>,
489 /// Formula-path latent topology selector descriptor. The selector itself
490 /// fits candidates through the ordinary workflow; this slot lets callers
491 /// request and validate that path from the same config registry.
492 pub topology_auto_selector: Option<gam_solve::topology_selector::TopologyAutoSelector>,
493 /// `gamfit.fit(..., smooths={...})` Python kwarg routed through the FFI
494 /// bridge. JSON object keyed by formula symbol (single column name or
495 /// comma-joined tuple) → smooth descriptor (`{"kind": "duchon",
496 /// "centers": [[...], ...], ...}`). Applied as a post-processing step on
497 /// the [`TermCollectionSpec`] produced by the formula DSL: each smooth
498 /// term whose `feature_cols` match a registry key has its kind-specific
499 /// tunables (centers, knots, kernel hyperparameters) overridden with the
500 /// user-supplied values. The single canonical lowering path guarantees
501 /// `smooths={"x": Duchon(centers=K)}` (integer) produces a bit-identical
502 /// block spec to writing `duchon(x, centers=K)` in the formula; only
503 /// explicit array-valued `centers=` differs, routing through
504 /// `CenterStrategy::UserProvided` instead of `FarthestPoint`/`EqualMass`.
505 pub smooth_overrides: Option<JsonValue>,
506 /// Engage the cross-process ON-DISK persistent warm-start layer (#1082).
507 ///
508 /// Default `false`: only the always-on in-memory warm start runs, so a
509 /// single fit and throwaway/replicate/CI-coverage loops pay zero disk I/O
510 /// (no `WarmStartStore` dir/eviction scan, no record load/store). Set
511 /// `true` to engage cross-process / repeat-fit resume: the flag threads
512 /// `FitConfig → FitOptions → ExternalOptimOptions` down to the standard
513 /// `RemlState`, which then calls `enable_persistent_warm_start_disk()`.
514 pub persist_warm_start_disk: bool,
515}
516
517impl Default for FitConfig {
518 fn default() -> Self {
519 Self {
520 family: None,
521 negative_binomial_theta: None,
522 link: None,
523 flexible_link: false,
524 offset_column: None,
525 noise_offset_column: None,
526 frailty: None,
527 baseline_target: "linear".into(),
528 baseline_scale: None,
529 baseline_shape: None,
530 baseline_rate: None,
531 baseline_makeham: None,
532 time_basis: "ispline".into(),
533 time_degree: 3,
534 time_num_internal_knots: 8,
535 time_smooth_lambda: 1e-2,
536 survival_likelihood: "location-scale".into(),
537 survival_distribution: "gaussian".into(),
538 threshold_time_k: None,
539 threshold_time_degree: 3,
540 sigma_time_k: None,
541 sigma_time_degree: 3,
542 noise_formula: None,
543 logslope_formula: None,
544 z_column: None,
545 weight_column: None,
546 expectile_tau: None,
547 ctn_stage1: None,
548 scale_dimensions: false,
549 adaptive_regularization: None,
550 ridge_lambda: 1e-6,
551 transformation_normal: false,
552 firth: false,
553 outer_max_iter: None,
554 outer_wall_clock_budget_secs: None,
555 gpu_policy: gam_gpu::GpuPolicy::Auto,
556 resource_policy: None,
557 group_metadata: None,
558 coefficient_groups: Vec::new(),
559 penalty_block_gamma_priors: Vec::new(),
560 latents: None,
561 analytic_penalties: None,
562 topology_auto_selector: None,
563 smooth_overrides: None,
564 persist_warm_start_disk: false,
565 }
566 }
567}
568/// The result of materializing a formula + config against a dataset.
569pub struct MaterializedModel<'a> {
570 pub request: FitRequest<'a>,
571 pub inference_notes: Vec<String>,
572}
573pub struct SplineScanInputs {
574 /// Abscissae of the single 1-D smooth (training rows of its feature column).
575 pub x: Vec<f64>,
576 /// Gaussian response.
577 pub y: Vec<f64>,
578 /// Observation weights (variance is `σ²/w`).
579 pub w: Vec<f64>,
580 /// Smoothing-spline order `m = penalty_order ∈ {1, 2, 3}`: `m = 1` the
581 /// random-walk/linear smoother (penalty `λ∫f′²`), `m = 2` the cubic
582 /// smoother (penalty `λ∫f″²`), `m = 3` the quintic smoother (penalty
583 /// `λ∫(f‴)²`).
584 pub order: usize,
585}
586pub struct ResidualCascadeInputs {
587 /// One slice per coordinate axis (2 or 3) of the single scattered smooth.
588 pub coords: Vec<Vec<f64>>,
589 /// Gaussian response.
590 pub y: Vec<f64>,
591 /// Observation weights (variance is `σ²/w`).
592 pub w: Vec<f64>,
593 /// Per-axis positive metric scaling `diag(metric)` of `z = diag(metric)·x`.
594 pub metric: Vec<f64>,
595 /// Sobolev smoothness order `s` of the multilevel Wendland-(3,1) prior,
596 /// clamped into the native-space window `(d/2, (d+3)/2]` (issue caveat 1).
597 pub sobolev_s: f64,
598}