gam_models/fit_orchestration/
entry.rs

1use super::*;
2
3/// Request-specific inputs to the canonical standard-fit `FitOptions`.
4///
5/// Everything in here varies per call (the link state extracted from the
6/// formula/config, the linear constraints synthesized from `bounded()` /
7/// shape-constrained terms, the Firth / adaptive-regularization toggles read
8/// off the `FitConfig`). Every *policy* field of `FitOptions` — the ones that
9/// decide HOW the outer REML optimization behaves (`compute_inference`,
10/// `skip_rho_posterior_inference`, `tol`, the `max_iter` default, the penalty
11/// shrinkage floor) — is filled in by [`canonical_standard_fit_options`] and is
12/// NOT settable here, so the CLI binary and the Python/PyO3 path cannot resolve
13/// a different optimization policy for the same model (#1196). Before this seam
14/// existed the CLI hand-built `FitOptions` with `tol: 1e-6` /
15/// `skip_rho_posterior_inference: false` while the formula path used
16/// `tol: 1e-10` / `skip_rho_posterior_inference: true`, so the identical model
17/// fit *differently* depending on which entry point you called it from — the
18/// exact class of divergence #1191 surfaced.
19#[derive(Default)]
20pub struct StandardFitOptionsInputs {
21    pub latent_cloglog: Option<LatentCLogLogState>,
22    pub mixture_link: Option<MixtureLinkSpec>,
23    pub optimize_mixture: bool,
24    pub sas_link: Option<SasLinkSpec>,
25    pub optimize_sas: bool,
26    pub linear_constraints: Option<gam_solve::pirls::LinearInequalityConstraints>,
27    pub firth_bias_reduction: bool,
28    pub adaptive_regularization: Option<AdaptiveRegularizationOptions>,
29    /// `Some` only when a caller (the forced-Firth CLI branch) overrides the
30    /// canonical default. `None` keeps the single-source default `Some(1e-6)`.
31    pub penalty_shrinkage_floor_override: Option<Option<f64>>,
32    pub persist_warm_start_disk: bool,
33}
34
35/// The single source of truth for standard-fit `FitOptions` *policy*.
36///
37/// Both standard-fit entry points — `materialize_standard` (the formula /
38/// Python / PyO3 path) and the `gam` CLI's `run_fit` — construct their
39/// `StandardFitRequest` options through this function, so the outer REML
40/// optimization policy (`compute_inference`, `skip_rho_posterior_inference`,
41/// `tol`, `max_iter` default, `penalty_shrinkage_floor`) is identical by
42/// construction. New policy fields must be set HERE, never re-derived at a call
43/// site, which is what makes Python/CLI behavioral divergence structurally
44/// impossible rather than enforced by parallel-but-equal code (#1196).
45pub fn canonical_standard_fit_options(
46    config: &FitConfig,
47    inputs: StandardFitOptionsInputs,
48) -> FitOptions {
49    FitOptions {
50        latent_cloglog: inputs.latent_cloglog,
51        mixture_link: inputs.mixture_link,
52        optimize_mixture: inputs.optimize_mixture,
53        sas_link: inputs.sas_link,
54        optimize_sas: inputs.optimize_sas,
55        // Posterior covariance is always computed so `predict --uncertainty`
56        // works for every family (the `COV_MAX_P` diagonal fallback caps cost).
57        compute_inference: true,
58        // Formula/CLI fits are the interactive/default path: keep coefficient
59        // covariance and the smoothing correction, but do not run the optional
60        // live-rho posterior certificate/escalation, which can launch NUTS over
61        // rho and turn ordinary fits into sampler benchmarks. Lower-level
62        // callers that explicitly need the rho posterior opt in elsewhere.
63        skip_rho_posterior_inference: true,
64        max_iter: config.outer_max_iter.unwrap_or(200),
65        // Outer REML/LAML smoothing-selection tolerance. `1e-10` (effective
66        // projected-gradient threshold ≈ 1e-7) resolves λ̂ to optimiser
67        // precision and restores the `w=c ⇔ c-fold replication` invariance in
68        // smoothing selection (gam#893). The CLI previously used the stale
69        // `1e-6`, which over-smoothed relative to the formula path.
70        tol: 1e-10,
71        nullspace_dims: vec![],
72        linear_constraints: inputs.linear_constraints,
73        firth_bias_reduction: inputs.firth_bias_reduction,
74        adaptive_regularization: inputs.adaptive_regularization,
75        penalty_shrinkage_floor: inputs
76            .penalty_shrinkage_floor_override
77            .unwrap_or(Some(1e-6)),
78        rho_prior: Default::default(),
79        kronecker_penalty_system: None,
80        kronecker_factored: None,
81        persist_warm_start_disk: inputs.persist_warm_start_disk,
82    }
83}
84
85pub fn fit_model(request: FitRequest<'_>) -> Result<FitResult, WorkflowError> {
86    // Disk warm-start persistence is opt-in. The always-on in-memory warm start
87    // remains inside the fit engines, but the workflow dispatcher must not open
88    // the shared WarmStartStore for ordinary formula fits: refit-heavy quality
89    // tests get no cross-process reuse and previously paid cache lookup,
90    // checkpoint, and eviction scans on every replicate (#1082/#1114).
91    let request = request;
92    // Each `fit_*_model` helper still returns `Result<_, String>` internally;
93    // the boundary conversion happens here so the public API returns
94    // `WorkflowError::IntegrationFailed` carrying the underlying solver text.
95    let wrap_solver_err =
96        |reason: String| -> WorkflowError { WorkflowError::IntegrationFailed { reason } };
97    match request {
98        FitRequest::Standard(request) => fit_standard_model(request)
99            .map(FitResult::Standard)
100            .map_err(wrap_solver_err),
101        FitRequest::GaussianLocationScale(request) => fit_gaussian_location_scale_model(request)
102            .map(FitResult::GaussianLocationScale)
103            .map_err(wrap_solver_err),
104        FitRequest::BinomialLocationScale(request) => fit_binomial_location_scale_model(request)
105            .map(FitResult::BinomialLocationScale)
106            .map_err(wrap_solver_err),
107        FitRequest::DispersionLocationScale(request) => {
108            fit_dispersion_location_scale_model(request)
109                .map(FitResult::DispersionLocationScale)
110                .map_err(wrap_solver_err)
111        }
112        FitRequest::SurvivalLocationScale(request) => fit_survival_location_scale_model(request)
113            .map(FitResult::SurvivalLocationScale)
114            .map_err(wrap_solver_err),
115        FitRequest::SurvivalTransformation(request) => fit_survival_transformation_model(request)
116            .map(FitResult::SurvivalTransformation)
117            .map_err(wrap_solver_err),
118        FitRequest::BernoulliMarginalSlope(request) => fit_bernoulli_marginal_slope_model(request)
119            .map(FitResult::BernoulliMarginalSlope)
120            .map_err(wrap_solver_err),
121        FitRequest::SurvivalMarginalSlope(request) => fit_survival_marginal_slope_model(request)
122            .map(FitResult::SurvivalMarginalSlope)
123            .map_err(wrap_solver_err),
124        FitRequest::LatentSurvival(request) => fit_latent_survival_model(request)
125            .map(FitResult::LatentSurvival)
126            .map_err(wrap_solver_err),
127        FitRequest::LatentBinary(request) => fit_latent_binary_model(request)
128            .map(FitResult::LatentBinary)
129            .map_err(wrap_solver_err),
130        FitRequest::TransformationNormal(request) => fit_transformation_normal_model(request)
131            .map(FitResult::TransformationNormal)
132            .map_err(wrap_solver_err),
133    }
134}
135/// Resolve the [`gam_runtime::resource::ResourcePolicy`] backing term construction
136/// for a given [`FitConfig`] + dataset.
137///
138/// If the caller hasn't supplied an explicit policy override, derive one from
139/// the shape of the problem via
140/// [`gam_runtime::resource::ResourcePolicy::for_problem`]. At large scale (n_rows
141/// >= 100k or the marginal-slope large-scale path active) this returns
142/// > `analytic_operator_required` so that any silent dense materialization in
143/// > the term-construction layer fails fast rather than allocating tens of GiB;
144/// > at small scale it falls through to the permissive default-library policy
145/// > so that non-operator bases still build cleanly.
146///
147/// `p_estimate = 0` because the per-block coefficient count isn't known until
148/// the spec has been built; the n_rows and hints triggers are sufficient to
149/// flip strict mode for every shape that needs it.
150pub(crate) fn resolved_resource_policy(
151    config: &FitConfig,
152    data: &Dataset,
153    hints: gam_runtime::resource::ProblemHints,
154) -> gam_runtime::resource::ResourcePolicy {
155    if let Some(p) = config.resource_policy.clone() {
156        return p;
157    }
158    gam_runtime::resource::ResourcePolicy::for_problem(data.values.nrows(), 0, hints)
159}
160
161pub(crate) fn marginal_slope_hints(config: &FitConfig) -> gam_runtime::resource::ProblemHints {
162    gam_runtime::resource::ProblemHints {
163        marginal_slope_large_scale_active: config.logslope_formula.is_some()
164            || config.z_column.is_some(),
165    }
166}
167/// Parse, materialize, and fit a model in one call.
168/// Resolve the expectile asymmetry `τ` requested by `config`, if any.
169///
170/// Returns `Ok(Some(τ))` when `config.family` is `"expectile"` (optionally with
171/// an inline asymmetry, `"expectile(0.9)"`), `Ok(None)` for every other family,
172/// and `Err` when an expectile request carries an out-of-range `τ`. The inline
173/// form takes precedence over the explicit [`FitConfig::expectile_tau`] field
174/// only when both are present and disagree is rejected as a contradiction; when
175/// neither pins `τ`, the median expectile `τ = 0.5` (the ordinary mean fit) is
176/// the default.
177fn expectile_tau_for_config(config: &FitConfig) -> Result<Option<f64>, WorkflowError> {
178    let Some(raw) = config.family.as_deref() else {
179        return Ok(None);
180    };
181    let trimmed = raw.trim();
182    let lower = trimmed.to_ascii_lowercase();
183    if !(lower == "expectile" || lower.starts_with("expectile(")) {
184        return Ok(None);
185    }
186    let invalid = |reason: String| WorkflowError::InvalidConfig { reason };
187    // Optional inline asymmetry: `expectile(0.9)`.
188    let inline_tau = if let Some(rest) = lower.strip_prefix("expectile(") {
189        let inner = rest.strip_suffix(')').ok_or_else(|| {
190            invalid(format!(
191                "expectile family asymmetry must be written as `expectile(τ)`; got `{trimmed}`"
192            ))
193        })?;
194        let value: f64 = inner.trim().parse().map_err(|_| {
195            invalid(format!(
196                "expectile asymmetry `{}` is not a finite number",
197                inner.trim()
198            ))
199        })?;
200        Some(value)
201    } else {
202        None
203    };
204    let tau = match (inline_tau, config.expectile_tau) {
205        (Some(a), Some(b)) if (a - b).abs() > 0.0 => {
206            return Err(invalid(format!(
207                "expectile asymmetry given both inline (`expectile({a})`) and via expectile_tau \
208                 ({b}); supply exactly one"
209            )));
210        }
211        (Some(a), _) => a,
212        (None, Some(b)) => b,
213        (None, None) => 0.5,
214    };
215    if !(tau.is_finite() && tau > 0.0 && tau < 1.0) {
216        return Err(invalid(format!(
217            "expectile asymmetry τ must be finite and strictly in (0, 1); got {tau}"
218        )));
219    }
220    Ok(Some(tau))
221}
222
223/// Per-row asymmetric LAWS weight `wᵢ(τ) = τ` if `yᵢ > μᵢ` else `1 − τ`, scaled
224/// by the base prior weight. At the boundary `yᵢ = μᵢ` the two half-weights
225/// agree in the limit only at `τ = 0.5`; the convention `yᵢ > μᵢ ⇒ τ` (strict)
226/// matches Newey–Powell's lower-closed asymmetric loss and is what `expectreg`
227/// uses. The fixed point is independent of the tie convention because ties form
228/// a measure-zero set under any continuous response.
229fn expectile_row_weights(
230    y: ArrayView1<f64>,
231    mu: ArrayView1<f64>,
232    base: ArrayView1<f64>,
233    tau: f64,
234) -> Array1<f64> {
235    Array1::from_shape_fn(y.len(), |i| {
236        let asym = if y[i] > mu[i] { tau } else { 1.0 - tau };
237        base[i] * asym
238    })
239}
240
241fn constant_gaussian_standard_fit(
242    request: &StandardFitRequest<'_>,
243) -> Result<StandardFitResult, WorkflowError> {
244    if !request.family.is_gaussian_identity() || request.y.is_empty() {
245        return Err(WorkflowError::InvalidConfig {
246            reason: "constant Gaussian shortcut requires a non-empty Gaussian identity request"
247                .to_string(),
248        });
249    }
250    if request.y.iter().any(|value| !value.is_finite())
251        || request.offset.iter().any(|value| !value.is_finite())
252        || request
253            .weights
254            .iter()
255            .any(|value| !value.is_finite() || *value < 0.0)
256    {
257        return Err(WorkflowError::InvalidConfig {
258            reason: "constant Gaussian shortcut requires finite response, offset, and non-negative weights"
259                .to_string(),
260        });
261    }
262    let weight_sum = request.weights.sum();
263    if !(weight_sum.is_finite() && weight_sum > 0.0) {
264        return Err(WorkflowError::InvalidConfig {
265            reason: "constant Gaussian shortcut requires positive total weight".to_string(),
266        });
267    }
268    let mut centered_sum = 0.0_f64;
269    for i in 0..request.y.len() {
270        centered_sum += request.weights[i] * (request.y[i] - request.offset[i]);
271    }
272    let intercept = centered_sum / weight_sum;
273    let design =
274        build_term_collection_design(request.data.view(), &request.spec).map_err(|err| {
275            WorkflowError::InvalidConfig {
276                reason: format!("constant Gaussian shortcut could not rebuild design: {err}"),
277            }
278        })?;
279    let p = design.design.ncols();
280    let mut beta = Array1::<f64>::zeros(p);
281    for col in design.intercept_range.clone() {
282        if col < p {
283            beta[col] = intercept;
284        }
285    }
286    let lambdas = Array1::<f64>::ones(design.penalties.len());
287    let log_lambdas = Array1::<f64>::zeros(design.penalties.len());
288    let fit =
289        gam_solve::estimate::UnifiedFitResult::try_from_parts(gam_solve::estimate::UnifiedFitResultParts {
290            blocks: vec![gam_solve::estimate::FittedBlock {
291                beta: beta.clone(),
292                role: gam_problem::BlockRole::Mean,
293                edf: design.intercept_range.len() as f64,
294                lambdas: lambdas.clone(),
295            }],
296            log_lambdas,
297            lambdas,
298            likelihood_family: Some(request.family.clone()),
299            likelihood_scale: gam_problem::LikelihoodScaleMetadata::ProfiledGaussian,
300            log_likelihood_normalization: gam_problem::LogLikelihoodNormalization::UserProvided,
301            log_likelihood: 0.0,
302            deviance: 0.0,
303            reml_score: 0.0,
304            stable_penalty_term: 0.0,
305            penalized_objective: 0.0,
306            used_device: false,
307            outer_iterations: 0,
308            outer_converged: true,
309            outer_gradient_norm: Some(0.0),
310            standard_deviation: 0.0,
311            covariance_conditional: None,
312            covariance_corrected: None,
313            inference: None,
314            fitted_link: gam_solve::estimate::FittedLinkState::Standard(None),
315            geometry: None,
316            block_states: Vec::new(),
317            pirls_status: gam_solve::pirls::PirlsStatus::Converged,
318            max_abs_eta: intercept.abs(),
319            constraint_kkt: None,
320            artifacts: gam_solve::estimate::FitArtifacts {
321                pirls: None,
322                ..Default::default()
323            },
324            inner_cycles: 0,
325        })
326        .map_err(|err| WorkflowError::IntegrationFailed {
327            reason: format!("constant Gaussian shortcut produced invalid fit: {err}"),
328        })?;
329    let resolvedspec =
330        freeze_term_collection_from_design(&request.spec, &design).map_err(|err| {
331            WorkflowError::InvalidConfig {
332                reason: format!("constant Gaussian shortcut could not freeze design: {err}"),
333            }
334        })?;
335    Ok(StandardFitResult {
336        fit,
337        design,
338        resolvedspec,
339        adaptive_diagnostics: None,
340        kappa_timing: None,
341        saved_link_state: gam_solve::estimate::FittedLinkState::Standard(None),
342        wiggle_knots: None,
343        wiggle_degree: None,
344    })
345}
346
347fn gaussian_response_is_constant(request: &StandardFitRequest<'_>) -> bool {
348    if !request.family.is_gaussian_identity()
349        || request.y.is_empty()
350        || request.y.iter().any(|value| !value.is_finite())
351    {
352        return false;
353    }
354    let (lo, hi) = request
355        .y
356        .iter()
357        .fold((f64::INFINITY, f64::NEG_INFINITY), |(lo, hi), &value| {
358            (lo.min(value), hi.max(value))
359        });
360    (hi - lo).abs() <= 1.0e-12 * hi.abs().max(1.0)
361}
362
363pub fn fit_from_formula(
364    formula: &str,
365    data: &Dataset,
366    config: &FitConfig,
367) -> Result<FitResult, WorkflowError> {
368    // Expectile regression (Newey–Powell asymmetric least squares): when the
369    // family resolves to "expectile", the τ-expectile of `y | x` is the
370    // minimizer of `Σ wᵢ(τ)·(yᵢ − μᵢ)²`, `wᵢ(τ) = τ` if `yᵢ > μᵢ` else `1 − τ`
371    // — the smooth analogue of the τ-quantile. The minimizer is a Least
372    // Asymmetrically Weighted Squares (LAWS) fixed point: iterate the penalized
373    // Gaussian-identity GAM with `wᵢ(τ)` recomputed from the current `μᵢ` until
374    // the residual-sign pattern stabilizes. REML λ-selection runs inside each
375    // inner Gaussian solve, so every gam smooth/tensor/spatial basis becomes a
376    // penalized expectile smooth with data-driven smoothing for free. This is a
377    // genuine estimator route, not a silent swap: it fires only on the explicit
378    // `family = "expectile"`. Every other family falls through unchanged.
379    if let Some(tau) = expectile_tau_for_config(config)? {
380        return fit_expectile_laws(formula, data, config, tau);
381    }
382    let mat = materialize(formula, data, config)?;
383    // Exact O(n) spline-scan fast path (#1030): when the materialized request
384    // is the single 1-D Gaussian-identity penalized-smooth shape the
385    // state-space scan solves exactly, route through it and return the
386    // scan-bearing model directly — the same penalized posterior at O(n) per
387    // λ-trial instead of the dense design/Gram route. Detection is structural
388    // and conservative (see `spline_scan_fast_path`); every other shape falls
389    // through to the dense `fit_model` path unchanged. Mirrors the CLI
390    // (main.rs run_fit) and FFI consumers, which build the persistence payload
391    // from this same `SplineScanFit`.
392    if let FitRequest::Standard(request) = &mat.request {
393        if gaussian_response_is_constant(request) {
394            return constant_gaussian_standard_fit(request).map(FitResult::Standard);
395        }
396        if let Some(inputs) = spline_scan_fast_path(request) {
397            let scan = gam_solve::spline_scan::fit_spline_scan(
398                &inputs.x,
399                &inputs.y,
400                &inputs.w,
401                inputs.order,
402            )
403            .map_err(|reason| WorkflowError::IntegrationFailed { reason })?;
404            return Ok(FitResult::SplineScan(scan));
405        }
406        // O(n log n) multiresolution residual-cascade fast path (#1032): a
407        // scattered low-d Gaussian-identity Duchon/Matérn smooth past the
408        // dense-kernel cliff. UNLIKE the scan, the cascade is a DIFFERENT
409        // posterior from the dense radial term, so it only ever fires as an
410        // explicit alternative estimator on the exact structural signature
411        // (`residual_cascade_fast_path`) AND when the in-cascade quasi-uniformity
412        // guard certifies the metric — a rejected metric or any ineligible shape
413        // falls through to the dense `fit_model` path (a genuine estimator
414        // choice, never a silent swap). The save paths build the persistence
415        // payload from this `ResidualCascadeFit`'s `to_state` snapshot.
416        if let Some(inputs) = residual_cascade_fast_path(request) {
417            let coord_refs: Vec<&[f64]> = inputs.coords.iter().map(Vec::as_slice).collect();
418            if let Ok(fit) = gam_solve::residual_cascade::fit_residual_cascade(
419                &coord_refs,
420                &inputs.y,
421                &inputs.w,
422                &inputs.metric,
423                inputs.sobolev_s,
424            ) {
425                return Ok(FitResult::ResidualCascade(fit));
426            }
427            // The quasi-uniformity guard (caveat 2) or any degenerate-design
428            // signal surfaces as a build/solve error; fall through to the dense
429            // kernel path rather than failing the fit outright.
430        }
431    }
432    // `fit_model` already returns `WorkflowError` end-to-end; propagate it
433    // directly instead of stringifying then re-wrapping.
434    fit_model(mat.request)
435}
436
437/// Least Asymmetrically Weighted Squares (LAWS) driver for expectile GAMs.
438///
439/// The τ-expectile surface minimizes `Σ wᵢ(τ)·(yᵢ − μᵢ)²` with the residual-
440/// sign asymmetric weight `wᵢ(τ)`. Because that weight is piecewise-constant in
441/// `sign(yᵢ − μᵢ)`, the objective is the supremum of a finite family of
442/// weighted least-squares problems and its minimizer is the unique fixed point
443/// of: *solve the penalized WLS with weights frozen at the current sign
444/// pattern, then recompute the sign pattern from the new fit*. The asymmetric
445/// loss is strictly convex (weights bounded in `[min(τ,1−τ), max(τ,1−τ)] > 0`),
446/// so this monotone-descent iteration converges, and since the sign pattern
447/// takes finitely many values it stabilizes in finitely many steps (Schnabel &
448/// Eilers 2009; the same Newton/IRLS-for-expectiles `expectreg` runs).
449///
450/// Each inner solve is the FULL standard Gaussian-identity GAM: any basis,
451/// tensor, spatial smooth, by-variable, random effect, plus REML λ-selection on
452/// the current asymmetric weights. The returned fit is an ordinary
453/// [`FitResult::Standard`] whose coefficients ARE the penalized τ-expectile —
454/// every downstream consumer (predict, posterior bands, persistence) works
455/// unchanged. The reported scale is the asymmetric working variance, so
456/// expectile standard errors are the sandwich-free Gaussian-form bands of the
457/// converged weighted problem (a deliberate first-rung choice; see #1100).
458fn fit_expectile_laws(
459    formula: &str,
460    data: &Dataset,
461    config: &FitConfig,
462    tau: f64,
463) -> Result<FitResult, WorkflowError> {
464    use gam_linalg::matrix::LinearOperator;
465
466    // Inner fits are ordinary Gaussian-identity GAMs; the τ asymmetry lives
467    // entirely in the per-iteration prior weights this driver injects.
468    let gaussian_config = FitConfig {
469        family: Some("gaussian".to_string()),
470        link: Some("identity".to_string()),
471        expectile_tau: None,
472        ..config.clone()
473    };
474
475    // Materialize once to capture the fixed training design, response, offset,
476    // and base prior weights. The design (basis, penalties, identifiability
477    // transforms) does not depend on the prior weights, so it is reused across
478    // every LAWS iteration; only the weight vector and the resulting β change.
479    let base_mat = materialize(formula, data, &gaussian_config)?;
480    let FitRequest::Standard(base_request) = base_mat.request else {
481        return Err(WorkflowError::InvalidConfig {
482            reason: "expectile regression is only defined for standard (non-survival, \
483                     non-location-scale) responses"
484                .to_string(),
485        });
486    };
487    let StandardFitRequest {
488        data: design_data,
489        y,
490        weights: base_weights,
491        offset,
492        spec,
493        family: materialized_family,
494        options,
495        kappa_options,
496        wiggle,
497        coefficient_groups,
498        penalty_block_gamma_priors,
499        latent_coord,
500        _marker,
501    } = base_request;
502    // The materializer already resolved the inner family to Gaussian-identity
503    // from `gaussian_config`; assert it so a future materializer change that
504    // silently picked a different family for `"gaussian"` is caught here rather
505    // than producing a non-expectile fit.
506    if !materialized_family.is_gaussian_identity() {
507        return Err(WorkflowError::InvalidConfig {
508            reason: format!(
509                "expectile LAWS requires a Gaussian-identity inner family; materializer produced {}",
510                materialized_family.name()
511            ),
512        });
513    }
514
515    if wiggle.is_some() || latent_coord.is_some() {
516        return Err(WorkflowError::InvalidConfig {
517            reason: "expectile regression does not support flexible-link wiggle or latent \
518                     coordinates"
519                .to_string(),
520        });
521    }
522
523    let n = y.len();
524    let gaussian_family = LikelihoodSpec::gaussian_identity();
525    // Cold start: τ = 0.5 (symmetric) weights ⇒ the first inner fit is the OLS
526    // mean GAM, the natural warm start for any τ.
527    let mut weights = base_weights.clone();
528    let mut last_sign: Option<Vec<bool>> = None;
529    let mut last_result: Option<StandardFitResult> = None;
530
531    // The sign pattern has 2ⁿ values but LAWS visits a monotone-descent subset;
532    // empirically a handful of iterations suffice. The cap is a safety guard:
533    // on the rare oscillation between two equal-objective sign patterns (only
534    // possible when rows sit exactly on the fitted surface) the last fit is a
535    // valid τ-expectile of the perturbation-stable problem, so returning it is
536    // correct rather than an error.
537    const MAX_LAWS_ITERS: usize = 50;
538
539    for _iter in 0..MAX_LAWS_ITERS {
540        let request = StandardFitRequest {
541            data: design_data.clone(),
542            y: y.clone(),
543            weights: weights.clone(),
544            offset: offset.clone(),
545            spec: spec.clone(),
546            family: gaussian_family.clone(),
547            options: options.clone(),
548            kappa_options: kappa_options.clone(),
549            wiggle: None,
550            coefficient_groups: coefficient_groups.clone(),
551            penalty_block_gamma_priors: penalty_block_gamma_priors.clone(),
552            latent_coord: None,
553            _marker,
554        };
555        let result = fit_standard_model(request)
556            .map_err(|reason| WorkflowError::IntegrationFailed { reason })?;
557
558        // Training-scale fitted mean μ = X·β (identity link, zero-checked
559        // offset folded by the design path). The design columns match the
560        // combined coefficient vector exactly (the same contract `predict`
561        // and the safety tests rely on).
562        let mu = result.design.design.apply(&result.fit.beta);
563        if mu.len() != n {
564            return Err(WorkflowError::IntegrationFailed {
565                reason: format!(
566                    "expectile LAWS: fitted mean length {} disagrees with response length {n}",
567                    mu.len()
568                ),
569            });
570        }
571        let mut mu_off = mu;
572        mu_off += &offset;
573
574        let sign: Vec<bool> = (0..n).map(|i| y[i] > mu_off[i]).collect();
575        let converged = last_sign.as_ref().is_some_and(|prev| prev == &sign);
576        weights = expectile_row_weights(y.view(), mu_off.view(), base_weights.view(), tau);
577        last_sign = Some(sign);
578        last_result = Some(result);
579        if converged {
580            break;
581        }
582    }
583
584    let result = last_result.ok_or_else(|| WorkflowError::IntegrationFailed {
585        reason: "expectile LAWS produced no fit".to_string(),
586    })?;
587    Ok(FitResult::Standard(result))
588}
589/// Detection seam for the exact O(n) cubic-smoothing-spline fast path.
590///
591/// This is the EARLIEST point in the standard workflow where a materialized
592/// fit request carries everything needed to prove the model is exactly the
593/// problem the scan solves: a Gaussian likelihood with identity link over
594/// `intercept + one 1-D cubic-class penalized smooth` — i.e. the penalized
595/// least-squares problem `min Σ w_i (y_i − f(x_i))² + λ∫f″²` with an
596/// unpenalized `{1, x}` null space. The Kalman/RTS scan computes that
597/// posterior (mean, pointwise variance, exact diffuse REML for λ) in O(n) per
598/// λ-trial instead of the dense design/Gram O(n·k²) + O(k³) route.
599///
600/// Returns `Some` only when ALL of the following hold; everything else falls
601/// through to the dense path:
602/// - family is Gaussian + identity link;
603/// - no link wiggle, no latent coordinates, no coefficient groups, no penalty
604///   hyperpriors, no linear/box constraints, no Firth, no adaptive
605///   regularization, no Kronecker systems, no externally injected null-space
606///   dims;
607/// - the term collection is exactly one smooth term — no linear terms, no
608///   random effects, no by-variables / factor interactions;
609/// - that smooth is a plain 1-D B-spline whose penalty order is compatible
610///   with the exact scan and whose null space is unshrunk
611///   (`double_penalty=false`). `double_penalty` (mgcv `select = TRUE`) on a free
612///   B-spline emits a second REML coordinate — the Marra & Wood (2011) null-space
613///   shrinkage block — that the scan cannot represent (its polynomial null space
614///   is an improper diffuse prior it can never shrink); routing such a fit
615///   through the scan would silently drop that penalty and select λ from the
616///   bending penalty alone, which is exactly the EDF inflation #1266 reports.
617///   Those fits fall through to the dense two-rho path, which owns both penalties
618///   jointly;
619/// - the offset is identically zero and every weight is finite and positive;
620/// - at least 3 distinct finite abscissae (the scan's diffuse rank plus one).
621///
622/// λ-mapping note: the scan's penalty is exactly `λ∫f″²` (state-space
623/// `q = 1/λ` at unit σ²). The dense 1-D B-spline path penalizes the same
624/// cubic class through a reduced-rank discrete-difference Gram whose
625/// normalization differs by a basis-dependent constant, so a λ selected by
626/// one parameterization does not transfer numerically to the other. The scan
627/// therefore always re-selects λ by its own exact diffuse REML criterion
628/// (the optimizer of the same restricted likelihood, expressed in the scan's
629/// parameterization); user-pinned smoothing parameters are not representable
630/// at this seam (the formula DSL exposes none for this term class), so no
631/// pinned-λ mapping arises.
632///
633/// Identifiability transforms on the smooth (centering / linear-trend
634/// removal / orthogonality-to-intercept) are accepted as eligible: they only
635/// re-coordinate the unpenalized null space against the implicit intercept
636/// and do not change the fitted posterior of `E[y|x]`, which is what the
637/// scan returns directly.
638pub fn spline_scan_fast_path(request: &StandardFitRequest<'_>) -> Option<SplineScanInputs> {
639    if !request.family.is_gaussian_identity() {
640        return None;
641    }
642    if request.wiggle.is_some()
643        || request.latent_coord.is_some()
644        || !request.coefficient_groups.is_empty()
645        || !request.penalty_block_gamma_priors.is_empty()
646    {
647        return None;
648    }
649    let options = &request.options;
650    if options.latent_cloglog.is_some()
651        || options.mixture_link.is_some()
652        || options.sas_link.is_some()
653        || options.linear_constraints.is_some()
654        || options.adaptive_regularization.is_some()
655        || options.kronecker_penalty_system.is_some()
656        || options.kronecker_factored.is_some()
657        || options.firth_bias_reduction
658        || !options.nullspace_dims.is_empty()
659    {
660        return None;
661    }
662    let spec = &request.spec;
663    if !spec.linear_terms.is_empty()
664        || !spec.random_effect_terms.is_empty()
665        || spec.smooth_terms.len() != 1
666    {
667        return None;
668    }
669    let term = &spec.smooth_terms[0];
670    if !matches!(term.shape, gam_terms::smooth::ShapeConstraint::None)
671        || term.joint_null_rotation.is_some()
672    {
673        return None;
674    }
675    let gam_terms::smooth::SmoothBasisSpec::BSpline1D {
676        feature_col,
677        spec: bspec,
678    } = &term.basis
679    else {
680        return None;
681    };
682    // Smoothing-spline order m = penalty_order ∈ {1, 2, 3}. The exact scan
683    // integrates the order-m integrated-Wiener prior whose natural spline has
684    // degree 2m−1 (m=1 → linear, m=2 → cubic, m=3 → quintic), so require that
685    // degree to match user intent. The de Jong exact diffuse leading-block
686    // smoother (#1044) handles the m−1 partially-diffuse leading nodes for all
687    // m ≤ MAX_ORDER; m > MAX_ORDER falls through to the dense path.
688    let order = bspec.penalty_order;
689    // Double-penalty (mgcv `select = TRUE`) is NOT representable by the scan and
690    // must fall through to the dense two-rho path (#1266). On a free B-spline the
691    // double penalty emits a *second* REML coordinate — the Marra & Wood (2011)
692    // null-space shrinkage block `Z Zᵀ` (see `bspline_penalty_candidates`) —
693    // whose entire purpose is to let REML shrink the unpenalized `{1, x, …}`
694    // polynomial null space toward `EDF → 0` for an unsupported term. The scan,
695    // by construction, carries that null space as an *improper diffuse* prior it
696    // can never shrink (its EDF floor is the null-space dimension `order`), so
697    // routing a `double_penalty` fit through it silently DROPS the second penalty
698    // and selects λ from the single bending penalty alone. The scan's own exact
699    // diffuse REML then genuinely prefers a mildly wiggly fit at finite λ for
700    // some noise realizations (an interior REML optimum, EDF ≈ 3–4), which is the
701    // EDF inflation #1266 reports. The dense path owns both penalties jointly and
702    // its outer REML, seeded into the over-smoothing basin, drives the null space
703    // out (EDF → null-space dim) when the data are truly polynomial. Excluding
704    // `double_penalty` here keeps such a fit on the dense path; single-penalty
705    // and boundary-conditioned single-penalty B-splines keep the exact O(n) scan.
706    if !(1..=3).contains(&order)
707        || bspec.degree != 2 * order - 1
708        || bspec.double_penalty
709        || !bspec.boundary_conditions.is_free()
710        || !matches!(bspec.boundary, gam_terms::basis::OneDimensionalBoundary::Open)
711        || matches!(
712            bspec.knotspec,
713            gam_terms::basis::BSplineKnotSpec::PeriodicUniform { .. }
714        )
715    {
716        return None;
717    }
718    if request.offset.iter().any(|&v| v != 0.0) {
719        return None;
720    }
721    if request.weights.iter().any(|&v| !(v.is_finite() && v > 0.0)) {
722        return None;
723    }
724    if *feature_col >= request.data.ncols() || request.y.len() != request.data.nrows() {
725        return None;
726    }
727    let x: Vec<f64> = request.data.column(*feature_col).iter().copied().collect();
728    let y: Vec<f64> = request.y.iter().copied().collect();
729    let w: Vec<f64> = request.weights.iter().copied().collect();
730    if x.iter().any(|v| !v.is_finite()) || y.iter().any(|v| !v.is_finite()) {
731        return None;
732    }
733    // The diffuse polynomial null space consumes `order` innovations; the scan
734    // needs at least one proper innovation beyond them to profile σ².
735    let mut sorted = x.clone();
736    sorted.sort_by(f64::total_cmp);
737    sorted.dedup();
738    if sorted.len() < order + 1 {
739        return None;
740    }
741    Some(SplineScanInputs { x, y, w, order })
742}
743
744/// Formula-level entry for the exact O(n) cubic-smoothing-spline fast path.
745///
746/// Materializes the formula exactly like [`fit_from_formula`], then runs the
747/// [`spline_scan_fast_path`] detection on the resulting standard request.
748/// When detection fires the fit is routed through
749/// [`gam_solve::spline_scan::fit_spline_scan`] — the exact diffuse
750/// REML Kalman/RTS scan — and the full in-memory posterior
751/// ([`gam_solve::spline_scan::SplineScanFit`]: knots, smoothed
752/// states, pointwise variances, lag-one gains, σ², log λ, exact EDF, and an
753/// exact `predict`) is returned. `Ok(None)` means the model is not the
754/// scan-eligible shape and the caller should use the dense
755/// [`fit_from_formula`] path; this keeps every persistence-bearing consumer
756/// (model save, CLI, FFI) transparently on the dense fit, whose saved payload
757/// the scan does not yet have a schema for.
758pub fn fit_spline_scan_from_formula(
759    formula: &str,
760    data: &Dataset,
761    config: &FitConfig,
762) -> Result<Option<gam_solve::spline_scan::SplineScanFit>, WorkflowError> {
763    let mat = materialize(formula, data, config)?;
764    let FitRequest::Standard(request) = mat.request else {
765        return Ok(None);
766    };
767    let Some(inputs) = spline_scan_fast_path(&request) else {
768        return Ok(None);
769    };
770    gam_solve::spline_scan::fit_spline_scan(&inputs.x, &inputs.y, &inputs.w, inputs.order)
771        .map(Some)
772        .map_err(|reason| WorkflowError::IntegrationFailed { reason })
773}
774
775/// #1464 diagnostic entry point: evaluate the EXACT production fixed-κ
776/// profiled-REML criterion (`fixed_kappa_profiled_reml_score`, the same one the
777/// joint-fit κ-sign scan uses) at a list of pinned κ values for the first
778/// constant-curvature term of `formula`, materialised from `data`/`config`
779/// exactly like [`fit_from_formula`]. Returns `(κ, V_p(κ))` pairs.
780///
781/// This settles solver-vs-criterion for the railing bug: if `V_p(+κ) < V_p(−κ)`
782/// for a genuinely HYPERBOLIC dataset, the criterion itself prefers the collapsed
783/// +κ corner — the bug is in the constant-curvature REML/Occam term, not the
784/// optimiser. If `V_p(−κ) < V_p(+κ)` yet the full fit still returns +κ, the bug
785/// is in the solver/readback. The profiled fit pins κ and profiles only ρ
786/// (κ-optimisation disabled), so each returned score is the negative-log-evidence
787/// the outer loop minimises.
788pub fn constant_curvature_profiled_reml_scores(
789    formula: &str,
790    data: &Dataset,
791    config: &FitConfig,
792    kappas: &[f64],
793) -> Result<Vec<(f64, f64)>, WorkflowError> {
794    let mat = materialize(formula, data, config)?;
795    let FitRequest::Standard(request) = mat.request else {
796        return Err(WorkflowError::IntegrationFailed {
797            reason: "constant_curvature_profiled_reml_scores: formula did not materialise to a \
798                     standard fit request"
799                .to_string(),
800        });
801    };
802    let term_idx = *crate::fit_orchestration::drivers::constant_curvature_term_indices(&request.spec)
803        .first()
804        .ok_or_else(|| WorkflowError::IntegrationFailed {
805            reason: "constant_curvature_profiled_reml_scores: formula has no constant-curvature \
806                     curv() term"
807                .to_string(),
808        })?;
809    let mut out = Vec::with_capacity(kappas.len());
810    for &kappa in kappas {
811        let score = crate::fit_orchestration::drivers::fixed_kappa_profiled_reml_score(
812            request.data.view(),
813            request.y.view(),
814            request.weights.view(),
815            request.offset.view(),
816            &request.spec,
817            term_idx,
818            kappa,
819            request.family.clone(),
820            &request.options,
821        )
822        .map_err(|e| WorkflowError::IntegrationFailed {
823            reason: format!(
824                "constant_curvature_profiled_reml_scores: fixed-κ fit at κ={kappa} failed: {e}"
825            ),
826        })?;
827        out.push((kappa, score));
828    }
829    Ok(out)
830}
831
832/// Derived dense-kernel cliff: the cascade auto-route fires only once the dense
833/// radial basis the smooth would otherwise use has SATURATED at its center cap
834/// (`default_num_centers == K_MAX`), so the dense `O(n·K² + K³)` kernel solve
835/// can no longer grow resolution with `n` and the streaming cascade's
836/// `O(n·polylog)` is the only path that keeps improving. This is the structural
837/// "past the dense-kernel cliff" condition the issue names — derived from the
838/// dense sizing rule, NOT a magic n constant or a user flag.
839fn past_dense_kernel_cliff(n: usize, d: usize) -> bool {
840    // `default_num_centers` clamps to K_MAX = 2000; equality means the dense
841    // basis is pinned at the cap and cannot densify further with n.
842    const DENSE_CENTER_CAP: usize = 2000;
843    gam_terms::basis::default_num_centers(n, d) >= DENSE_CENTER_CAP
844}
845
846/// Map a Duchon/Matérn smoothness order onto the cascade's Sobolev order,
847/// clamped into the Wendland-(3,1) native window `(d/2, (d+3)/2]` (issue
848/// caveat 1: the multilevel frame can only represent up to `H^{(d+3)/2}`).
849fn cascade_sobolev_order(requested: f64, d: usize) -> f64 {
850    let lo = d as f64 / 2.0;
851    let hi = (d as f64 + 3.0) / 2.0;
852    // Nudge strictly inside the open lower bound when the request lands on it.
853    let eps = 1e-6 * (hi - lo);
854    requested.clamp(lo + eps, hi)
855}
856
857/// Detection seam for the O(n log n) multiresolution residual-cascade fast path
858/// (issue #1032).
859///
860/// This mirrors [`spline_scan_fast_path`] in shape but carries one CRITICAL
861/// difference dictated by the issue: the cascade is **not** the same posterior
862/// as the Duchon/Matérn term it stands in for (a different finite basis — the
863/// multilevel Wendland frame, not the reduced-rank radial kernel). So unlike
864/// the 1-D scan, which silently swaps an identical posterior, this path must
865/// only fire as an explicit alternative estimator on the structural signature
866/// the issue names, never as a transparent replacement. It returns `Some` only
867/// when ALL of the following hold:
868/// - family is Gaussian + identity link (the scattered low-d smooth the
869///   cascade solves);
870/// - none of the exotic-link / constraint / Firth / Kronecker / coefficient-
871///   group / hyperprior machinery is engaged;
872/// - the model is exactly one smooth term — no linear terms, no random
873///   effects, no by-variables;
874/// - that smooth is a scattered radial spatial smooth (`Duchon` or `Matern`)
875///   over `d ∈ {2, 3}` coordinates with no shape constraint;
876/// - the offset is identically zero and every weight is finite and positive;
877/// - `n` is past the derived dense-kernel cliff
878///   ([`past_dense_kernel_cliff`]) — below it the dense radial path is both
879///   exact-posterior and cheap, so there is no reason to change estimators.
880///
881/// The returned [`ResidualCascadeInputs`] carry a unit per-axis metric (the
882/// spec's isotropic radial distance); the quasi-uniformity guard inside
883/// [`gam_solve::residual_cascade::fit_residual_cascade`] (issue caveat 2)
884/// is the no-regression gate that refuses the iterative solve — and forces the
885/// caller back to the dense path — when a near-degenerate metric would break
886/// the BPX iteration bound.
887pub fn residual_cascade_fast_path(
888    request: &StandardFitRequest<'_>,
889) -> Option<ResidualCascadeInputs> {
890    if !request.family.is_gaussian_identity() {
891        return None;
892    }
893    if request.wiggle.is_some()
894        || request.latent_coord.is_some()
895        || !request.coefficient_groups.is_empty()
896        || !request.penalty_block_gamma_priors.is_empty()
897    {
898        return None;
899    }
900    let options = &request.options;
901    if options.latent_cloglog.is_some()
902        || options.mixture_link.is_some()
903        || options.sas_link.is_some()
904        || options.linear_constraints.is_some()
905        || options.adaptive_regularization.is_some()
906        || options.kronecker_penalty_system.is_some()
907        || options.kronecker_factored.is_some()
908        || options.firth_bias_reduction
909        || !options.nullspace_dims.is_empty()
910    {
911        return None;
912    }
913    let spec = &request.spec;
914    if !spec.linear_terms.is_empty()
915        || !spec.random_effect_terms.is_empty()
916        || spec.smooth_terms.len() != 1
917    {
918        return None;
919    }
920    let term = &spec.smooth_terms[0];
921    if !matches!(term.shape, gam_terms::smooth::ShapeConstraint::None)
922        || term.joint_null_rotation.is_some()
923    {
924        return None;
925    }
926    // Only scattered radial spatial smooths (Duchon / Matérn) over 2–3 axes.
927    // The Duchon spectral power `p + s` and the Matérn order set the requested
928    // Sobolev smoothness; both clamp into the Wendland native window.
929    let (feature_cols, requested_s) = match &term.basis {
930        gam_terms::smooth::SmoothBasisSpec::Duchon {
931            feature_cols, spec, ..
932        } => {
933            // Pure-Duchon native order is `p + s` (kernel exponent 2(p+s)−d);
934            // the multilevel frame targets the same continuum smoothness. `p`
935            // is the polynomial nullspace degree, `s` the spectral power.
936            let p = match spec.nullspace_order {
937                gam_terms::basis::DuchonNullspaceOrder::Zero => 0.0,
938                gam_terms::basis::DuchonNullspaceOrder::Linear => 1.0,
939                gam_terms::basis::DuchonNullspaceOrder::Degree(k) => k as f64,
940            };
941            (feature_cols, spec.power + p)
942        }
943        gam_terms::smooth::SmoothBasisSpec::Matern {
944            feature_cols, spec, ..
945        } => {
946            // Matérn smoothness ν sets native Sobolev order ν + d/2; the cascade
947            // frame represents up to (d+3)/2, so the clamp below applies the
948            // ceiling. (d is known just below from feature_cols.)
949            let nu = spec.nu.half_integer_value();
950            (feature_cols, nu + feature_cols.len() as f64 / 2.0)
951        }
952        _ => return None,
953    };
954    let d = feature_cols.len();
955    if !(2..=3).contains(&d) {
956        return None;
957    }
958    if request.offset.iter().any(|&v| v != 0.0) {
959        return None;
960    }
961    if request.weights.iter().any(|&v| !(v.is_finite() && v > 0.0)) {
962        return None;
963    }
964    let n = request.y.len();
965    if n != request.data.nrows() || feature_cols.iter().any(|&c| c >= request.data.ncols()) {
966        return None;
967    }
968    if !past_dense_kernel_cliff(n, d) {
969        return None;
970    }
971    let coords: Vec<Vec<f64>> = feature_cols
972        .iter()
973        .map(|&c| request.data.column(c).iter().copied().collect())
974        .collect();
975    let y: Vec<f64> = request.y.iter().copied().collect();
976    let w: Vec<f64> = request.weights.iter().copied().collect();
977    if coords
978        .iter()
979        .any(|axis| axis.iter().any(|v| !v.is_finite()))
980        || y.iter().any(|v| !v.is_finite())
981    {
982        return None;
983    }
984    let metric = vec![1.0_f64; d];
985    let sobolev_s = cascade_sobolev_order(requested_s, d);
986    Some(ResidualCascadeInputs {
987        coords,
988        y,
989        w,
990        metric,
991        sobolev_s,
992    })
993}
994
995/// Formula-level library entry for the O(n log n) residual-cascade fast path
996/// (issue #1032).
997///
998/// Materializes the formula exactly like [`fit_from_formula`], runs the
999/// [`residual_cascade_fast_path`] detection, and — when it fires AND the
1000/// quasi-uniformity guard inside the cascade certifies the metric — returns the
1001/// certified [`ResidualCascadeFit`](gam_solve::residual_cascade::ResidualCascadeFit).
1002/// `Ok(None)` means EITHER the model is not the cascade-eligible shape OR the
1003/// quasi-uniformity guard rejected the metric; in both cases the caller falls
1004/// back to the dense [`fit_from_formula`] path (the cascade is a different
1005/// posterior, so the fallback is a genuine estimator choice, never a silent
1006/// swap). This keeps every persistence-bearing consumer on the dense fit until
1007/// the cascade payload schema lands.
1008pub fn fit_residual_cascade_from_formula(
1009    formula: &str,
1010    data: &Dataset,
1011    config: &FitConfig,
1012) -> Result<Option<gam_solve::residual_cascade::ResidualCascadeFit>, WorkflowError> {
1013    let mat = materialize(formula, data, config)?;
1014    let FitRequest::Standard(request) = mat.request else {
1015        return Ok(None);
1016    };
1017    let Some(inputs) = residual_cascade_fast_path(&request) else {
1018        return Ok(None);
1019    };
1020    let coord_refs: Vec<&[f64]> = inputs.coords.iter().map(Vec::as_slice).collect();
1021    match gam_solve::residual_cascade::fit_residual_cascade(
1022        &coord_refs,
1023        &inputs.y,
1024        &inputs.w,
1025        &inputs.metric,
1026        inputs.sobolev_s,
1027    ) {
1028        Ok(fit) => Ok(Some(fit)),
1029        // The quasi-uniformity guard (caveat 2) and any degenerate-design
1030        // signal both surface as a build/solve error; treat them as "not
1031        // cascade-eligible" so the caller falls back to the dense kernel path
1032        // rather than failing the fit outright.
1033        Err(_) => Ok(None),
1034    }
1035}
1036
1037/// Parse a formula, resolve it against a dataset, and produce a ready-to-fit `FitRequest`.
1038pub fn materialize<'a>(
1039    formula: &str,
1040    data: &'a Dataset,
1041    config: &FitConfig,
1042) -> Result<MaterializedModel<'a>, WorkflowError> {
1043    gam_gpu::configure_global_policy(config.gpu_policy);
1044    let parsed = parse_formula(formula)?;
1045    let col_map = data.column_map();
1046
1047    if let Some((left_col, right_col, event_col)) = parse_surv_interval_response(&parsed.response)?
1048    {
1049        if config.transformation_normal {
1050            return Err(WorkflowError::InvalidConfig {
1051                reason:
1052                    "transformation_normal cannot be combined with a SurvInterval(...) response"
1053                        .to_string(),
1054            });
1055        }
1056        // Interval censoring `T ∈ (L, R]` is only defined for the latent
1057        // hazard-window survival likelihood, whose kernel carries the
1058        // `log[S(L) − S(R)]` interval contribution. Route the left boundary `L`
1059        // through the standard exit channel and the right boundary `R` through
1060        // the dedicated interval-right channel; `event_col` distinguishes
1061        // bracketed (interval) rows from right-censored rows beyond the last
1062        // inspection (which carry an infinite/sentinel `R`).
1063        materialize_survival(
1064            &parsed,
1065            data,
1066            &col_map,
1067            config,
1068            None,
1069            &left_col,
1070            &event_col,
1071            Some(&right_col),
1072        )
1073    } else if let Some((entry_col, exit_col, event_col)) = parse_surv_response(&parsed.response)? {
1074        if config.transformation_normal {
1075            return Err(WorkflowError::InvalidConfig {
1076                reason: "transformation_normal cannot be combined with a Surv(...) response"
1077                    .to_string(),
1078            });
1079        }
1080        // `materialize_*` now return `WorkflowError` directly so the typed
1081        // `ColumnNotFound` payload (and any future variant-typed leaf
1082        // errors) survive the dispatcher hop instead of being flattened
1083        // into `IntegrationFailed { reason: String }`.
1084        materialize_survival(
1085            &parsed,
1086            data,
1087            &col_map,
1088            config,
1089            entry_col.as_deref(),
1090            &exit_col,
1091            &event_col,
1092            None,
1093        )
1094    } else {
1095        // Non-survival response: `timewiggle(...)` and `survmodel(...)` are
1096        // structurally meaningless (there is no baseline hazard / time axis to
1097        // wiggle and no survival likelihood to configure). They are parsed into
1098        // `ParsedFormula` but consumed *only* by `materialize_survival`; without
1099        // this guard every non-survival materializer below would silently drop
1100        // them, fitting an ordinary GAM while the user believes they requested a
1101        // time-varying / survival model (#371). Reject here — the single
1102        // chokepoint for all non-survival paths — mirroring the symmetric
1103        // auxiliary-formula rejection in `validate_auxiliary_formula_controls`.
1104        reject_survival_only_terms_for_nonsurvival(&parsed)?;
1105        if config.transformation_normal {
1106            // Issue #789A: a Bernoulli marginal-slope request with
1107            // `transformation_normal=true` used to dispatch as a CTN fit while
1108            // retaining marginal-slope controls, leaving the transformation path
1109            // in a non-advancing loop. CTN score calibration now uses the
1110            // explicit `ctn_stage1` recipe instead, so the legacy boolean is a
1111            // hard configuration error for marginal-slope requests.
1112            reject_marginal_slope_controls_for_transformation_normal(config)?;
1113            if config.noise_formula.is_some() {
1114                return Err(WorkflowError::InvalidConfig {
1115                    reason: "transformation_normal cannot be combined with noise_formula"
1116                        .to_string(),
1117                });
1118            }
1119            materialize_transformation_normal(&parsed, data, &col_map, config)
1120        } else if config.logslope_formula.is_some() || config.z_column.is_some() {
1121            materialize_bernoulli_marginal_slope(&parsed, data, &col_map, config)
1122        } else if config.noise_formula.is_some() {
1123            materialize_location_scale(&parsed, data, &col_map, config)
1124        } else {
1125            materialize_standard(&parsed, data, &col_map, config)
1126        }
1127    }
1128}
gam_models/fit_orchestration/entry.rs

gam_models/fit_orchestration/
entry.rs