Skip to main content

gam_models/fit_orchestration/
entry.rs

1use super::*;
2
3/// Request-specific inputs to the canonical standard-fit `FitOptions`.
4///
5/// Everything in here varies per call (the link state extracted from the
6/// formula/config, the linear constraints synthesized from `bounded()` /
7/// shape-constrained terms, the Firth / adaptive-regularization toggles read
8/// off the `FitConfig`). Every *policy* field of `FitOptions` — the ones that
9/// decide HOW the outer REML optimization behaves (`compute_inference`,
10/// `skip_rho_posterior_inference`, `tol`, the `max_iter` default, the penalty
11/// shrinkage floor) — is filled in by [`canonical_standard_fit_options`] and is
12/// NOT settable here, so the CLI binary and the Python/PyO3 path cannot resolve
13/// a different optimization policy for the same model (#1196). Before this seam
14/// existed the CLI hand-built `FitOptions` with `tol: 1e-6` /
15/// `skip_rho_posterior_inference: false` while the formula path used
16/// `tol: 1e-10` / `skip_rho_posterior_inference: true`, so the identical model
17/// fit *differently* depending on which entry point you called it from — the
18/// exact class of divergence #1191 surfaced.
19#[derive(Default)]
20pub struct StandardFitOptionsInputs {
21    pub latent_cloglog: Option<LatentCLogLogState>,
22    pub mixture_link: Option<MixtureLinkSpec>,
23    pub optimize_mixture: bool,
24    pub sas_link: Option<SasLinkSpec>,
25    pub optimize_sas: bool,
26    pub linear_constraints: Option<gam_solve::pirls::LinearInequalityConstraints>,
27    pub firth_bias_reduction: bool,
28    pub adaptive_regularization: Option<AdaptiveRegularizationOptions>,
29    /// `Some` only when a caller (the forced-Firth CLI branch) overrides the
30    /// canonical default. `None` keeps the single-source default `Some(1e-6)`.
31    pub penalty_shrinkage_floor_override: Option<Option<f64>>,
32    pub persist_warm_start_disk: bool,
33}
34
35/// The single source of truth for standard-fit `FitOptions` *policy*.
36///
37/// Both standard-fit entry points — `materialize_standard` (the formula /
38/// Python / PyO3 path) and the `gam` CLI's `run_fit` — construct their
39/// `StandardFitRequest` options through this function, so the outer REML
40/// optimization policy (`compute_inference`, `skip_rho_posterior_inference`,
41/// `tol`, `max_iter` default, `penalty_shrinkage_floor`) is identical by
42/// construction. New policy fields must be set HERE, never re-derived at a call
43/// site, which is what makes Python/CLI behavioral divergence structurally
44/// impossible rather than enforced by parallel-but-equal code (#1196).
45pub fn canonical_standard_fit_options(
46    config: &FitConfig,
47    inputs: StandardFitOptionsInputs,
48) -> FitOptions {
49    FitOptions {
50        latent_cloglog: inputs.latent_cloglog,
51        mixture_link: inputs.mixture_link,
52        optimize_mixture: inputs.optimize_mixture,
53        sas_link: inputs.sas_link,
54        optimize_sas: inputs.optimize_sas,
55        // Posterior covariance is always computed so `predict --uncertainty`
56        // works for every family (the `COV_MAX_P` diagonal fallback caps cost).
57        compute_inference: true,
58        // Formula/CLI fits are the interactive/default path: keep coefficient
59        // covariance and the smoothing correction, and emit the CHEAP Tier-0
60        // live-rho posterior certificate (a handful of outer-criterion
61        // evaluations), which the optimizer surfaces regardless of this flag
62        // whenever it is cheaply available (#1810). This flag only suppresses the
63        // EXPENSIVE escalation tiers (Tier-1 quadrature / Tier-2 NUTS over rho),
64        // which could otherwise launch NUTS and turn ordinary fits into sampler
65        // benchmarks. Lower-level callers that explicitly need the escalation opt
66        // in elsewhere (`skip_rho_posterior_inference: false`).
67        skip_rho_posterior_inference: true,
68        max_iter: config.outer_max_iter.unwrap_or(200),
69        // Outer REML/LAML smoothing-selection tolerance. `1e-10` (effective
70        // projected-gradient threshold ≈ 1e-7) resolves λ̂ to optimiser
71        // precision and restores the `w=c ⇔ c-fold replication` invariance in
72        // smoothing selection (gam#893). The CLI previously used the stale
73        // `1e-6`, which over-smoothed relative to the formula path.
74        tol: 1e-10,
75        nullspace_dims: vec![],
76        linear_constraints: inputs.linear_constraints,
77        firth_bias_reduction: inputs.firth_bias_reduction,
78        adaptive_regularization: inputs.adaptive_regularization,
79        penalty_shrinkage_floor: inputs
80            .penalty_shrinkage_floor_override
81            .unwrap_or(Some(1e-6)),
82        rho_prior: Default::default(),
83        kronecker_penalty_system: None,
84        kronecker_factored: None,
85        persist_warm_start_disk: inputs.persist_warm_start_disk,
86    }
87}
88
89pub fn fit_model(request: FitRequest<'_>) -> Result<FitResult, WorkflowError> {
90    // Disk warm-start persistence is opt-in. The always-on in-memory warm start
91    // remains inside the fit engines, but the workflow dispatcher must not open
92    // the shared WarmStartStore for ordinary formula fits: refit-heavy quality
93    // tests get no cross-process reuse and previously paid cache lookup,
94    // checkpoint, and eviction scans on every replicate (#1082/#1114).
95    let request = request;
96    // Each `fit_*_model` helper still returns `Result<_, String>` internally;
97    // the boundary conversion happens here so the public API returns
98    // `WorkflowError::IntegrationFailed` carrying the underlying solver text.
99    let wrap_solver_err =
100        |reason: String| -> WorkflowError { WorkflowError::IntegrationFailed { reason } };
101    match request {
102        FitRequest::Standard(request) => fit_standard_model(request)
103            .map(FitResult::Standard)
104            .map_err(wrap_solver_err),
105        FitRequest::GaussianLocationScale(request) => fit_gaussian_location_scale_model(request)
106            .map(FitResult::GaussianLocationScale)
107            .map_err(wrap_solver_err),
108        FitRequest::BinomialLocationScale(request) => fit_binomial_location_scale_model(request)
109            .map(FitResult::BinomialLocationScale)
110            .map_err(wrap_solver_err),
111        FitRequest::DispersionLocationScale(request) => {
112            fit_dispersion_location_scale_model(request)
113                .map(FitResult::DispersionLocationScale)
114                .map_err(wrap_solver_err)
115        }
116        FitRequest::SurvivalLocationScale(request) => fit_survival_location_scale_model(request)
117            .map(FitResult::SurvivalLocationScale)
118            .map_err(wrap_solver_err),
119        FitRequest::SurvivalTransformation(request) => fit_survival_transformation_model(request)
120            .map(FitResult::SurvivalTransformation)
121            .map_err(wrap_solver_err),
122        FitRequest::BernoulliMarginalSlope(request) => fit_bernoulli_marginal_slope_model(request)
123            .map(FitResult::BernoulliMarginalSlope)
124            .map_err(wrap_solver_err),
125        FitRequest::SurvivalMarginalSlope(request) => fit_survival_marginal_slope_model(request)
126            .map(FitResult::SurvivalMarginalSlope)
127            .map_err(wrap_solver_err),
128        FitRequest::LatentSurvival(request) => fit_latent_survival_model(request)
129            .map(FitResult::LatentSurvival)
130            .map_err(wrap_solver_err),
131        FitRequest::LatentBinary(request) => fit_latent_binary_model(request)
132            .map(FitResult::LatentBinary)
133            .map_err(wrap_solver_err),
134        FitRequest::TransformationNormal(request) => fit_transformation_normal_model(request)
135            .map(FitResult::TransformationNormal)
136            .map_err(wrap_solver_err),
137    }
138}
139/// Resolve the [`gam_runtime::resource::ResourcePolicy`] backing term construction
140/// for a given [`FitConfig`] + dataset.
141///
142/// If the caller hasn't supplied an explicit policy override, derive one from
143/// the shape of the problem via
144/// [`gam_runtime::resource::ResourcePolicy::for_problem`]. At large scale (n_rows
145/// >= 100k or the marginal-slope large-scale path active) this returns
146/// > `analytic_operator_required` so that any silent dense materialization in
147/// > the term-construction layer fails fast rather than allocating tens of GiB;
148/// > at small scale it falls through to the permissive default-library policy
149/// > so that non-operator bases still build cleanly.
150///
151/// `p_estimate = 0` because the per-block coefficient count isn't known until
152/// the spec has been built; the n_rows and hints triggers are sufficient to
153/// flip strict mode for every shape that needs it.
154pub(crate) fn resolved_resource_policy(
155    config: &FitConfig,
156    data: &Dataset,
157    hints: gam_runtime::resource::ProblemHints,
158) -> gam_runtime::resource::ResourcePolicy {
159    if let Some(p) = config.resource_policy.clone() {
160        return p;
161    }
162    gam_runtime::resource::ResourcePolicy::for_problem(data.values.nrows(), 0, hints)
163}
164
165pub(crate) fn marginal_slope_hints(config: &FitConfig) -> gam_runtime::resource::ProblemHints {
166    gam_runtime::resource::ProblemHints {
167        marginal_slope_large_scale_active: config.logslope_formula.is_some()
168            || config.z_column.is_some(),
169    }
170}
171/// Parse, materialize, and fit a model in one call.
172/// Resolve the expectile asymmetry `τ` requested by `config`, if any.
173///
174/// Returns `Ok(Some(τ))` when `config.family` is `"expectile"` (optionally with
175/// an inline asymmetry, `"expectile(0.9)"`), `Ok(None)` for every other family,
176/// and `Err` when an expectile request carries an out-of-range `τ`. The inline
177/// form takes precedence over the explicit [`FitConfig::expectile_tau`] field
178/// only when both are present and disagree is rejected as a contradiction; when
179/// neither pins `τ`, the median expectile `τ = 0.5` (the ordinary mean fit) is
180/// the default.
181fn expectile_tau_for_config(config: &FitConfig) -> Result<Option<f64>, WorkflowError> {
182    let Some(raw) = config.family.as_deref() else {
183        return Ok(None);
184    };
185    let trimmed = raw.trim();
186    let lower = trimmed.to_ascii_lowercase();
187    if !(lower == "expectile" || lower.starts_with("expectile(")) {
188        return Ok(None);
189    }
190    let invalid = |reason: String| WorkflowError::InvalidConfig { reason };
191    // Optional inline asymmetry: `expectile(0.9)`.
192    let inline_tau = if let Some(rest) = lower.strip_prefix("expectile(") {
193        let inner = rest.strip_suffix(')').ok_or_else(|| {
194            invalid(format!(
195                "expectile family asymmetry must be written as `expectile(τ)`; got `{trimmed}`"
196            ))
197        })?;
198        let value: f64 = inner.trim().parse().map_err(|_| {
199            invalid(format!(
200                "expectile asymmetry `{}` is not a finite number",
201                inner.trim()
202            ))
203        })?;
204        Some(value)
205    } else {
206        None
207    };
208    let tau = match (inline_tau, config.expectile_tau) {
209        (Some(a), Some(b)) if (a - b).abs() > 0.0 => {
210            return Err(invalid(format!(
211                "expectile asymmetry given both inline (`expectile({a})`) and via expectile_tau \
212                 ({b}); supply exactly one"
213            )));
214        }
215        (Some(a), _) => a,
216        (None, Some(b)) => b,
217        (None, None) => 0.5,
218    };
219    if !(tau.is_finite() && tau > 0.0 && tau < 1.0) {
220        return Err(invalid(format!(
221            "expectile asymmetry τ must be finite and strictly in (0, 1); got {tau}"
222        )));
223    }
224    Ok(Some(tau))
225}
226
227/// Per-row asymmetric LAWS weight `wᵢ(τ) = τ` if `yᵢ > μᵢ` else `1 − τ`, scaled
228/// by the base prior weight. At the boundary `yᵢ = μᵢ` the two half-weights
229/// agree in the limit only at `τ = 0.5`; the convention `yᵢ > μᵢ ⇒ τ` (strict)
230/// matches Newey–Powell's lower-closed asymmetric loss and is what `expectreg`
231/// uses. The fixed point is independent of the tie convention because ties form
232/// a measure-zero set under any continuous response.
233fn expectile_row_weights(
234    y: ArrayView1<f64>,
235    mu: ArrayView1<f64>,
236    base: ArrayView1<f64>,
237    tau: f64,
238) -> Array1<f64> {
239    Array1::from_shape_fn(y.len(), |i| {
240        let asym = if y[i] > mu[i] { tau } else { 1.0 - tau };
241        base[i] * asym
242    })
243}
244
245fn constant_gaussian_standard_fit(
246    request: &StandardFitRequest<'_>,
247) -> Result<StandardFitResult, WorkflowError> {
248    if !request.family.is_gaussian_identity() || request.y.is_empty() {
249        return Err(WorkflowError::InvalidConfig {
250            reason: "constant Gaussian shortcut requires a non-empty Gaussian identity request"
251                .to_string(),
252        });
253    }
254    if request.y.iter().any(|value| !value.is_finite())
255        || request.offset.iter().any(|value| !value.is_finite())
256        || request
257            .weights
258            .iter()
259            .any(|value| !value.is_finite() || *value < 0.0)
260    {
261        return Err(WorkflowError::InvalidConfig {
262            reason: "constant Gaussian shortcut requires finite response, offset, and non-negative weights"
263                .to_string(),
264        });
265    }
266    let weight_sum = request.weights.sum();
267    if !(weight_sum.is_finite() && weight_sum > 0.0) {
268        return Err(WorkflowError::InvalidConfig {
269            reason: "constant Gaussian shortcut requires positive total weight".to_string(),
270        });
271    }
272    let mut centered_sum = 0.0_f64;
273    for i in 0..request.y.len() {
274        centered_sum += request.weights[i] * (request.y[i] - request.offset[i]);
275    }
276    let intercept = centered_sum / weight_sum;
277    let design =
278        build_term_collection_design(request.data.view(), &request.spec).map_err(|err| {
279            WorkflowError::InvalidConfig {
280                reason: format!("constant Gaussian shortcut could not rebuild design: {err}"),
281            }
282        })?;
283    let p = design.design.ncols();
284    let mut beta = Array1::<f64>::zeros(p);
285    for col in design.intercept_range.clone() {
286        if col < p {
287            beta[col] = intercept;
288        }
289    }
290    let lambdas = Array1::<f64>::ones(design.penalties.len());
291    let log_lambdas = Array1::<f64>::zeros(design.penalties.len());
292    let fit =
293        gam_solve::estimate::UnifiedFitResult::try_from_parts(gam_solve::estimate::UnifiedFitResultParts {
294            blocks: vec![gam_solve::estimate::FittedBlock {
295                beta: beta.clone(),
296                role: gam_problem::BlockRole::Mean,
297                edf: design.intercept_range.len() as f64,
298                lambdas: lambdas.clone(),
299            }],
300            log_lambdas,
301            lambdas,
302            likelihood_family: Some(request.family.clone()),
303            likelihood_scale: gam_problem::LikelihoodScaleMetadata::ProfiledGaussian,
304            log_likelihood_normalization: gam_problem::LogLikelihoodNormalization::UserProvided,
305            log_likelihood: 0.0,
306            deviance: 0.0,
307            reml_score: 0.0,
308            stable_penalty_term: 0.0,
309            penalized_objective: 0.0,
310            used_device: false,
311            outer_iterations: 0,
312            outer_converged: true,
313            outer_gradient_norm: Some(0.0),
314            standard_deviation: 0.0,
315            covariance_conditional: None,
316            covariance_corrected: None,
317            inference: None,
318            fitted_link: gam_solve::estimate::FittedLinkState::Standard(None),
319            geometry: None,
320            block_states: Vec::new(),
321            pirls_status: gam_solve::pirls::PirlsStatus::Converged,
322            max_abs_eta: intercept.abs(),
323            constraint_kkt: None,
324            artifacts: gam_solve::estimate::FitArtifacts {
325                pirls: None,
326                ..Default::default()
327            },
328            inner_cycles: 0,
329        })
330        .map_err(|err| WorkflowError::IntegrationFailed {
331            reason: format!("constant Gaussian shortcut produced invalid fit: {err}"),
332        })?;
333    let resolvedspec =
334        freeze_term_collection_from_design(&request.spec, &design).map_err(|err| {
335            WorkflowError::InvalidConfig {
336                reason: format!("constant Gaussian shortcut could not freeze design: {err}"),
337            }
338        })?;
339    Ok(StandardFitResult {
340        fit,
341        design,
342        resolvedspec,
343        adaptive_diagnostics: None,
344        kappa_timing: None,
345        saved_link_state: gam_solve::estimate::FittedLinkState::Standard(None),
346        wiggle_knots: None,
347        wiggle_degree: None,
348        wiggle_saved_warp_beta: None,
349    })
350}
351
352fn gaussian_response_is_constant(request: &StandardFitRequest<'_>) -> bool {
353    if !request.family.is_gaussian_identity()
354        || request.y.is_empty()
355        || request.y.iter().any(|value| !value.is_finite())
356    {
357        return false;
358    }
359    let (lo, hi) = request
360        .y
361        .iter()
362        .fold((f64::INFINITY, f64::NEG_INFINITY), |(lo, hi), &value| {
363            (lo.min(value), hi.max(value))
364        });
365    (hi - lo).abs() <= 1.0e-12 * hi.abs().max(1.0)
366}
367
368pub fn fit_from_formula(
369    formula: &str,
370    data: &Dataset,
371    config: &FitConfig,
372) -> Result<FitResult, WorkflowError> {
373    // Expectile regression (Newey–Powell asymmetric least squares): when the
374    // family resolves to "expectile", the τ-expectile of `y | x` is the
375    // minimizer of `Σ wᵢ(τ)·(yᵢ − μᵢ)²`, `wᵢ(τ) = τ` if `yᵢ > μᵢ` else `1 − τ`
376    // — the smooth analogue of the τ-quantile. The minimizer is a Least
377    // Asymmetrically Weighted Squares (LAWS) fixed point: iterate the penalized
378    // Gaussian-identity GAM with `wᵢ(τ)` recomputed from the current `μᵢ` until
379    // the residual-sign pattern stabilizes. REML λ-selection runs inside each
380    // inner Gaussian solve, so every gam smooth/tensor/spatial basis becomes a
381    // penalized expectile smooth with data-driven smoothing for free. This is a
382    // genuine estimator route, not a silent swap: it fires only on the explicit
383    // `family = "expectile"`. Every other family falls through unchanged.
384    if let Some(result) = fit_expectile_if_requested(formula, data, config)? {
385        return Ok(FitResult::Standard(result));
386    }
387    let mat = materialize(formula, data, config)?;
388    // Exact O(n) spline-scan fast path (#1030): when the materialized request
389    // is the single 1-D Gaussian-identity penalized-smooth shape the
390    // state-space scan solves exactly, route through it and return the
391    // scan-bearing model directly — the same penalized posterior at O(n) per
392    // λ-trial instead of the dense design/Gram route. Detection is structural
393    // and conservative (see `spline_scan_fast_path`); every other shape falls
394    // through to the dense `fit_model` path unchanged. Mirrors the CLI
395    // (main.rs run_fit) and FFI consumers, which build the persistence payload
396    // from this same `SplineScanFit`.
397    if let FitRequest::Standard(request) = &mat.request {
398        if gaussian_response_is_constant(request) {
399            return constant_gaussian_standard_fit(request).map(FitResult::Standard);
400        }
401        if let Some(inputs) = spline_scan_fast_path(request) {
402            let scan = gam_solve::spline_scan::fit_spline_scan(
403                &inputs.x,
404                &inputs.y,
405                &inputs.w,
406                inputs.order,
407            )
408            .map_err(|reason| WorkflowError::IntegrationFailed { reason })?;
409            return Ok(FitResult::SplineScan(scan));
410        }
411        // O(n log n) multiresolution residual-cascade fast path (#1032): a
412        // scattered low-d Gaussian-identity Duchon/Matérn smooth past the
413        // dense-kernel cliff. UNLIKE the scan, the cascade is a DIFFERENT
414        // posterior from the dense radial term, so it only ever fires as an
415        // explicit alternative estimator on the exact structural signature
416        // (`residual_cascade_fast_path`) AND when the in-cascade quasi-uniformity
417        // guard certifies the metric — a rejected metric or any ineligible shape
418        // falls through to the dense `fit_model` path (a genuine estimator
419        // choice, never a silent swap). The save paths build the persistence
420        // payload from this `ResidualCascadeFit`'s `to_state` snapshot.
421        if let Some(inputs) = residual_cascade_fast_path(request) {
422            let coord_refs: Vec<&[f64]> = inputs.coords.iter().map(Vec::as_slice).collect();
423            if let Ok(fit) = gam_solve::residual_cascade::fit_residual_cascade(
424                &coord_refs,
425                &inputs.y,
426                &inputs.w,
427                &inputs.metric,
428                inputs.sobolev_s,
429            ) {
430                return Ok(FitResult::ResidualCascade(fit));
431            }
432            // The quasi-uniformity guard (caveat 2) or any degenerate-design
433            // signal surfaces as a build/solve error; fall through to the dense
434            // kernel path rather than failing the fit outright.
435        }
436    }
437    // `fit_model` already returns `WorkflowError` end-to-end; propagate it
438    // directly instead of stringifying then re-wrapping.
439    fit_model(mat.request)
440}
441
442/// THE single dispatch seam for the expectile (Newey–Powell LAWS) family.
443///
444/// Returns `Ok(Some(result))` with the converged τ-expectile as an ordinary
445/// [`StandardFitResult`] when `config.family` selects the expectile family
446/// (`"expectile"` or `"expectile(τ)"`, optionally pinned by
447/// [`FitConfig::expectile_tau`]), `Ok(None)` for every other family — in which
448/// case the caller runs its normal materialize/`fit_model` path — and `Err` on a
449/// malformed expectile request or an inner-fit failure.
450///
451/// Every public entry point that resolves a family routes through this seam
452/// *before* materializing: the in-process [`fit_from_formula`], the Python FFI
453/// (`gam-pyffi`), and the `gam` CLI. Centralizing the dispatch here is what makes
454/// the estimator reachable from every interface instead of only the library
455/// call — and what prevents the class of bug where a newly-added outer estimator
456/// is wired into one entry point and silently bypassed by the others (#1777).
457/// The returned [`StandardFitResult`] carries the full design / resolved spec /
458/// fit, so each caller builds its persistence payload from it exactly as it does
459/// for any other standard fit.
460pub fn fit_expectile_if_requested(
461    formula: &str,
462    data: &Dataset,
463    config: &FitConfig,
464) -> Result<Option<StandardFitResult>, WorkflowError> {
465    match expectile_tau_for_config(config)? {
466        Some(tau) => Ok(Some(fit_expectile_laws(formula, data, config, tau)?)),
467        None => Ok(None),
468    }
469}
470
471/// Least Asymmetrically Weighted Squares (LAWS) driver for expectile GAMs.
472///
473/// The τ-expectile surface minimizes `Σ wᵢ(τ)·(yᵢ − μᵢ)²` with the residual-
474/// sign asymmetric weight `wᵢ(τ)`. Because that weight is piecewise-constant in
475/// `sign(yᵢ − μᵢ)`, the objective is the supremum of a finite family of
476/// weighted least-squares problems and its minimizer is the unique fixed point
477/// of: *solve the penalized WLS with weights frozen at the current sign
478/// pattern, then recompute the sign pattern from the new fit*. The asymmetric
479/// loss is strictly convex (weights bounded in `[min(τ,1−τ), max(τ,1−τ)] > 0`),
480/// so this monotone-descent iteration converges, and since the sign pattern
481/// takes finitely many values it stabilizes in finitely many steps (Schnabel &
482/// Eilers 2009; the same Newton/IRLS-for-expectiles `expectreg` runs).
483///
484/// Each inner solve is the FULL standard Gaussian-identity GAM: any basis,
485/// tensor, spatial smooth, by-variable, random effect, plus REML λ-selection on
486/// the current asymmetric weights. The returned fit is an ordinary
487/// [`FitResult::Standard`] whose coefficients ARE the penalized τ-expectile —
488/// every downstream consumer (predict, posterior bands, persistence) works
489/// unchanged. The reported scale is the asymmetric working variance, so
490/// expectile standard errors are the sandwich-free Gaussian-form bands of the
491/// converged weighted problem (a deliberate first-rung choice; see #1100).
492fn fit_expectile_laws(
493    formula: &str,
494    data: &Dataset,
495    config: &FitConfig,
496    tau: f64,
497) -> Result<StandardFitResult, WorkflowError> {
498    use gam_linalg::matrix::LinearOperator;
499
500    if config.frailty.as_ref().is_some_and(FrailtySpec::is_active) {
501        return Err(WorkflowError::InvalidConfig {
502            reason: "expectile regression does not support frailty; use a survival/frailty-aware family instead"
503                .to_string(),
504        });
505    }
506
507    // Inner fits are ordinary Gaussian-identity GAMs; the τ asymmetry lives
508    // entirely in the per-iteration prior weights this driver injects.
509    let gaussian_config = FitConfig {
510        family: Some("gaussian".to_string()),
511        link: Some("identity".to_string()),
512        expectile_tau: None,
513        // The inner Gaussian-identity design carries no frailty. Normalize the
514        // CLI/config-layer null value (`Some(FrailtySpec::None)`) to `None` so
515        // the expectile driver does not leak survival-only plumbing into the
516        // standard-family materializer, while the active-frailty guard above
517        // still rejects unsupported frailty requests explicitly.
518        frailty: None,
519        ..config.clone()
520    };
521
522    // Materialize once to capture the fixed training design, response, offset,
523    // and base prior weights. The design (basis, penalties, identifiability
524    // transforms) does not depend on the prior weights, so it is reused across
525    // every LAWS iteration; only the weight vector and the resulting β change.
526    let base_mat = materialize(formula, data, &gaussian_config)?;
527    let FitRequest::Standard(base_request) = base_mat.request else {
528        return Err(WorkflowError::InvalidConfig {
529            reason: "expectile regression is only defined for standard (non-survival, \
530                     non-location-scale) responses"
531                .to_string(),
532        });
533    };
534    let StandardFitRequest {
535        data: design_data,
536        y,
537        weights: base_weights,
538        offset,
539        spec,
540        family: materialized_family,
541        estimate_tweedie_p: _,
542        options,
543        kappa_options,
544        wiggle,
545        coefficient_groups,
546        penalty_block_gamma_priors,
547        latent_coord,
548        _marker,
549    } = base_request;
550    // The materializer already resolved the inner family to Gaussian-identity
551    // from `gaussian_config`; assert it so a future materializer change that
552    // silently picked a different family for `"gaussian"` is caught here rather
553    // than producing a non-expectile fit.
554    if !materialized_family.is_gaussian_identity() {
555        return Err(WorkflowError::InvalidConfig {
556            reason: format!(
557                "expectile LAWS requires a Gaussian-identity inner family; materializer produced {}",
558                materialized_family.name()
559            ),
560        });
561    }
562
563    if wiggle.is_some() || latent_coord.is_some() {
564        return Err(WorkflowError::InvalidConfig {
565            reason: "expectile regression does not support flexible-link wiggle or latent \
566                     coordinates"
567                .to_string(),
568        });
569    }
570
571    let n = y.len();
572    let gaussian_family = LikelihoodSpec::gaussian_identity();
573    // Cold start: τ = 0.5 (symmetric) weights ⇒ the first inner fit is the OLS
574    // mean GAM, the natural warm start for any τ.
575    let mut weights = base_weights.clone();
576    let mut last_sign: Option<Vec<bool>> = None;
577    let mut last_result: Option<StandardFitResult> = None;
578
579    // The sign pattern has 2ⁿ values but LAWS visits a monotone-descent subset;
580    // empirically a handful of iterations suffice. The cap is a safety guard:
581    // on the rare oscillation between two equal-objective sign patterns (only
582    // possible when rows sit exactly on the fitted surface) the last fit is a
583    // valid τ-expectile of the perturbation-stable problem, so returning it is
584    // correct rather than an error.
585    const MAX_LAWS_ITERS: usize = 50;
586
587    for _iter in 0..MAX_LAWS_ITERS {
588        let request = StandardFitRequest {
589            data: design_data.clone(),
590            y: y.clone(),
591            weights: weights.clone(),
592            offset: offset.clone(),
593            spec: spec.clone(),
594            family: gaussian_family.clone(),
595            // Expectile LAWS fits a Gaussian-identity inner family; no Tweedie
596            // power to estimate (#2026).
597            estimate_tweedie_p: false,
598            options: options.clone(),
599            kappa_options: kappa_options.clone(),
600            wiggle: None,
601            coefficient_groups: coefficient_groups.clone(),
602            penalty_block_gamma_priors: penalty_block_gamma_priors.clone(),
603            latent_coord: None,
604            _marker,
605        };
606        let result = fit_standard_model(request)
607            .map_err(|reason| WorkflowError::IntegrationFailed { reason })?;
608
609        // Training-scale fitted mean μ = X·β (identity link, zero-checked
610        // offset folded by the design path). The design columns match the
611        // combined coefficient vector exactly (the same contract `predict`
612        // and the safety tests rely on).
613        let mu = result.design.design.apply(&result.fit.beta);
614        if mu.len() != n {
615            return Err(WorkflowError::IntegrationFailed {
616                reason: format!(
617                    "expectile LAWS: fitted mean length {} disagrees with response length {n}",
618                    mu.len()
619                ),
620            });
621        }
622        let mut mu_off = mu;
623        mu_off += &offset;
624
625        let sign: Vec<bool> = (0..n).map(|i| y[i] > mu_off[i]).collect();
626        let converged = last_sign.as_ref().is_some_and(|prev| prev == &sign);
627        weights = expectile_row_weights(y.view(), mu_off.view(), base_weights.view(), tau);
628        last_sign = Some(sign);
629        last_result = Some(result);
630        if converged {
631            break;
632        }
633    }
634
635    let result = last_result.ok_or_else(|| WorkflowError::IntegrationFailed {
636        reason: "expectile LAWS produced no fit".to_string(),
637    })?;
638    Ok(result)
639}
640/// Detection seam for the exact O(n) cubic-smoothing-spline fast path.
641///
642/// This is the EARLIEST point in the standard workflow where a materialized
643/// fit request carries everything needed to prove the model is exactly the
644/// problem the scan solves: a Gaussian likelihood with identity link over
645/// `intercept + one 1-D cubic-class penalized smooth` — i.e. the penalized
646/// least-squares problem `min Σ w_i (y_i − f(x_i))² + λ∫f″²` with an
647/// unpenalized `{1, x}` null space. The Kalman/RTS scan computes that
648/// posterior (mean, pointwise variance, exact diffuse REML for λ) in O(n) per
649/// λ-trial instead of the dense design/Gram O(n·k²) + O(k³) route.
650///
651/// Returns `Some` only when ALL of the following hold; everything else falls
652/// through to the dense path:
653/// - family is Gaussian + identity link;
654/// - no link wiggle, no latent coordinates, no coefficient groups, no penalty
655///   hyperpriors, no linear/box constraints, no Firth, no adaptive
656///   regularization, no Kronecker systems, no externally injected null-space
657///   dims;
658/// - the term collection is exactly one smooth term — no linear terms, no
659///   random effects, no by-variables / factor interactions;
660/// - that smooth is a plain 1-D B-spline whose penalty order is compatible
661///   with the exact scan and whose null space is unshrunk
662///   (`double_penalty=false`). `double_penalty` (mgcv `select = TRUE`) on a free
663///   B-spline emits a second REML coordinate — the Marra & Wood (2011) null-space
664///   shrinkage block — that the scan cannot represent (its polynomial null space
665///   is an improper diffuse prior it can never shrink); routing such a fit
666///   through the scan would silently drop that penalty and select λ from the
667///   bending penalty alone, which is exactly the EDF inflation #1266 reports.
668///   Those fits fall through to the dense two-rho path, which owns both penalties
669///   jointly. Natural cubic regression (`bs="cr"`/`"cs"`) terms also fall
670///   through: their knot-value parameterization is a finite-rank regression
671///   spline, not the scan's full smoothing-spline state-space posterior;
672/// - the offset is identically zero and every weight is finite and positive;
673/// - at least 3 distinct finite abscissae (the scan's diffuse rank plus one).
674///
675/// λ-mapping note: the scan's penalty is exactly `λ∫f″²` (state-space
676/// `q = 1/λ` at unit σ²). The dense 1-D B-spline path penalizes the same
677/// cubic class through a reduced-rank discrete-difference Gram whose
678/// normalization differs by a basis-dependent constant, so a λ selected by
679/// one parameterization does not transfer numerically to the other. The scan
680/// therefore always re-selects λ by its own exact diffuse REML criterion
681/// (the optimizer of the same restricted likelihood, expressed in the scan's
682/// parameterization); user-pinned smoothing parameters are not representable
683/// at this seam (the formula DSL exposes none for this term class), so no
684/// pinned-λ mapping arises.
685///
686/// Identifiability transforms on the smooth (centering / linear-trend
687/// removal / orthogonality-to-intercept) are accepted as eligible: they only
688/// re-coordinate the unpenalized null space against the implicit intercept
689/// and do not change the fitted posterior of `E[y|x]`, which is what the
690/// scan returns directly.
691pub fn spline_scan_fast_path(request: &StandardFitRequest<'_>) -> Option<SplineScanInputs> {
692    if !request.family.is_gaussian_identity() {
693        return None;
694    }
695    if request.wiggle.is_some()
696        || request.latent_coord.is_some()
697        || !request.coefficient_groups.is_empty()
698        || !request.penalty_block_gamma_priors.is_empty()
699    {
700        return None;
701    }
702    let options = &request.options;
703    if options.latent_cloglog.is_some()
704        || options.mixture_link.is_some()
705        || options.sas_link.is_some()
706        || options.linear_constraints.is_some()
707        || options.adaptive_regularization.is_some()
708        || options.kronecker_penalty_system.is_some()
709        || options.kronecker_factored.is_some()
710        || options.firth_bias_reduction
711        || !options.nullspace_dims.is_empty()
712    {
713        return None;
714    }
715    let spec = &request.spec;
716    if !spec.linear_terms.is_empty()
717        || !spec.random_effect_terms.is_empty()
718        || spec.smooth_terms.len() != 1
719    {
720        return None;
721    }
722    let term = &spec.smooth_terms[0];
723    if !matches!(term.shape, gam_terms::smooth::ShapeConstraint::None)
724        || term.joint_null_rotation.is_some()
725    {
726        return None;
727    }
728    let gam_terms::smooth::SmoothBasisSpec::BSpline1D {
729        feature_col,
730        spec: bspec,
731    } = &term.basis
732    else {
733        return None;
734    };
735    // Smoothing-spline order m = penalty_order ∈ {1, 2, 3}. The exact scan
736    // integrates the order-m integrated-Wiener prior whose natural spline has
737    // degree 2m−1 (m=1 → linear, m=2 → cubic, m=3 → quintic), so require that
738    // degree to match user intent. The de Jong exact diffuse leading-block
739    // smoother (#1044) handles the m−1 partially-diffuse leading nodes for all
740    // m ≤ MAX_ORDER; m > MAX_ORDER falls through to the dense path.
741    let order = bspec.penalty_order;
742    // Double-penalty (mgcv `select = TRUE`) is NOT representable by the scan and
743    // must fall through to the dense two-rho path (#1266). On a free B-spline the
744    // double penalty emits a *second* REML coordinate — the Marra & Wood (2011)
745    // null-space shrinkage block `Z Zᵀ` (see `bspline_penalty_candidates`) —
746    // whose entire purpose is to let REML shrink the unpenalized `{1, x, …}`
747    // polynomial null space toward `EDF → 0` for an unsupported term. The scan,
748    // by construction, carries that null space as an *improper diffuse* prior it
749    // can never shrink (its EDF floor is the null-space dimension `order`), so
750    // routing a `double_penalty` fit through it silently DROPS the second penalty
751    // and selects λ from the single bending penalty alone. The scan's own exact
752    // diffuse REML then genuinely prefers a mildly wiggly fit at finite λ for
753    // some noise realizations (an interior REML optimum, EDF ≈ 3–4), which is the
754    // EDF inflation #1266 reports. The dense path owns both penalties jointly and
755    // its outer REML, seeded into the over-smoothing basin, drives the null space
756    // out (EDF → null-space dim) when the data are truly polynomial. Excluding
757    // `double_penalty` here keeps such a fit on the dense path; single-penalty
758    // and boundary-conditioned single-penalty B-splines keep the exact O(n) scan.
759    if !(1..=3).contains(&order)
760        || bspec.degree != 2 * order - 1
761        || bspec.double_penalty
762        || !bspec.boundary_conditions.is_free()
763        || !matches!(bspec.boundary, gam_terms::basis::OneDimensionalBoundary::Open)
764        || matches!(
765            bspec.knotspec,
766            gam_terms::basis::BSplineKnotSpec::PeriodicUniform { .. }
767                | gam_terms::basis::BSplineKnotSpec::NaturalCubicRegression { .. }
768        )
769        // mgcv `bs="cr"`/`"cs"` materialise a `NaturalCubicRegression` value-knot
770        // spec: a Lancaster–Salkauskas cubic-regression basis whose columns
771        // index `f(x*_i)` at `k` quantile knots — a genuinely DIFFERENT finite
772        // basis (and hence a different penalized posterior) from the free
773        // integrated-Wiener natural spline the exact scan solves on the raw data
774        // points. The scan builds its own knots from `x` and ignores this spec,
775        // so routing a cr fit through it would silently solve the wrong model and
776        // (per #1844) return a non-`Standard` `SplineScan` result the predict-time
777        // design replay cannot reconstruct. Keep cr/cs on the dense path.
778        || matches!(
779            bspec.knotspec,
780            gam_terms::basis::BSplineKnotSpec::NaturalCubicRegression { .. }
781        )
782    {
783        return None;
784    }
785    if request.offset.iter().any(|&v| v != 0.0) {
786        return None;
787    }
788    if request.weights.iter().any(|&v| !(v.is_finite() && v > 0.0)) {
789        return None;
790    }
791    if *feature_col >= request.data.ncols() || request.y.len() != request.data.nrows() {
792        return None;
793    }
794    let x: Vec<f64> = request.data.column(*feature_col).iter().copied().collect();
795    let y: Vec<f64> = request.y.iter().copied().collect();
796    let w: Vec<f64> = request.weights.iter().copied().collect();
797    if x.iter().any(|v| !v.is_finite()) || y.iter().any(|v| !v.is_finite()) {
798        return None;
799    }
800    // The diffuse polynomial null space consumes `order` innovations; the scan
801    // needs at least one proper innovation beyond them to profile σ².
802    let mut sorted = x.clone();
803    sorted.sort_by(f64::total_cmp);
804    sorted.dedup();
805    if sorted.len() < order + 1 {
806        return None;
807    }
808    Some(SplineScanInputs { x, y, w, order })
809}
810
811/// Formula-level entry for the exact O(n) cubic-smoothing-spline fast path.
812///
813/// Materializes the formula exactly like [`fit_from_formula`], then runs the
814/// [`spline_scan_fast_path`] detection on the resulting standard request.
815/// When detection fires the fit is routed through
816/// [`gam_solve::spline_scan::fit_spline_scan`] — the exact diffuse
817/// REML Kalman/RTS scan — and the full in-memory posterior
818/// ([`gam_solve::spline_scan::SplineScanFit`]: knots, smoothed
819/// states, pointwise variances, lag-one gains, σ², log λ, exact EDF, and an
820/// exact `predict`) is returned. `Ok(None)` means the model is not the
821/// scan-eligible shape and the caller should use the dense
822/// [`fit_from_formula`] path; this keeps every persistence-bearing consumer
823/// (model save, CLI, FFI) transparently on the dense fit, whose saved payload
824/// the scan does not yet have a schema for.
825pub fn fit_spline_scan_from_formula(
826    formula: &str,
827    data: &Dataset,
828    config: &FitConfig,
829) -> Result<Option<gam_solve::spline_scan::SplineScanFit>, WorkflowError> {
830    let mat = materialize(formula, data, config)?;
831    let FitRequest::Standard(request) = mat.request else {
832        return Ok(None);
833    };
834    let Some(inputs) = spline_scan_fast_path(&request) else {
835        return Ok(None);
836    };
837    gam_solve::spline_scan::fit_spline_scan(&inputs.x, &inputs.y, &inputs.w, inputs.order)
838        .map(Some)
839        .map_err(|reason| WorkflowError::IntegrationFailed { reason })
840}
841
842/// #1464 diagnostic entry point: evaluate the EXACT production fixed-κ
843/// profiled-REML criterion (`fixed_kappa_profiled_reml_score`, the same one the
844/// joint-fit κ-sign scan uses) at a list of pinned κ values for the first
845/// constant-curvature term of `formula`, materialised from `data`/`config`
846/// exactly like [`fit_from_formula`]. Returns `(κ, V_p(κ))` pairs.
847///
848/// This settles solver-vs-criterion for the railing bug: if `V_p(+κ) < V_p(−κ)`
849/// for a genuinely HYPERBOLIC dataset, the criterion itself prefers the collapsed
850/// +κ corner — the bug is in the constant-curvature REML/Occam term, not the
851/// optimiser. If `V_p(−κ) < V_p(+κ)` yet the full fit still returns +κ, the bug
852/// is in the solver/readback. The profiled fit pins κ and profiles only ρ
853/// (κ-optimisation disabled), so each returned score is the negative-log-evidence
854/// the outer loop minimises.
855pub fn constant_curvature_profiled_reml_scores(
856    formula: &str,
857    data: &Dataset,
858    config: &FitConfig,
859    kappas: &[f64],
860) -> Result<Vec<(f64, f64)>, WorkflowError> {
861    let mat = materialize(formula, data, config)?;
862    let FitRequest::Standard(request) = mat.request else {
863        return Err(WorkflowError::IntegrationFailed {
864            reason: "constant_curvature_profiled_reml_scores: formula did not materialise to a \
865                     standard fit request"
866                .to_string(),
867        });
868    };
869    let term_idx = *crate::fit_orchestration::drivers::constant_curvature_term_indices(&request.spec)
870        .first()
871        .ok_or_else(|| WorkflowError::IntegrationFailed {
872            reason: "constant_curvature_profiled_reml_scores: formula has no constant-curvature \
873                     curv() term"
874                .to_string(),
875        })?;
876    let mut out = Vec::with_capacity(kappas.len());
877    for &kappa in kappas {
878        let score = crate::fit_orchestration::drivers::fixed_kappa_profiled_reml_score(
879            request.data.view(),
880            request.y.view(),
881            request.weights.view(),
882            request.offset.view(),
883            &request.spec,
884            term_idx,
885            kappa,
886            request.family.clone(),
887            &request.options,
888        )
889        .map_err(|e| WorkflowError::IntegrationFailed {
890            reason: format!(
891                "constant_curvature_profiled_reml_scores: fixed-κ fit at κ={kappa} failed: {e}"
892            ),
893        })?;
894        out.push((kappa, score));
895    }
896    Ok(out)
897}
898
899/// Derived dense-kernel cliff: the cascade auto-route fires only once the dense
900/// radial basis the smooth would otherwise use has SATURATED at its center cap
901/// (`default_num_centers == K_MAX`), so the dense `O(n·K² + K³)` kernel solve
902/// can no longer grow resolution with `n` and the streaming cascade's
903/// `O(n·polylog)` is the only path that keeps improving. This is the structural
904/// "past the dense-kernel cliff" condition the issue names — derived from the
905/// dense sizing rule, NOT a magic n constant or a user flag.
906fn past_dense_kernel_cliff(n: usize, d: usize) -> bool {
907    // `default_num_centers` clamps to K_MAX = 2000; equality means the dense
908    // basis is pinned at the cap and cannot densify further with n.
909    const DENSE_CENTER_CAP: usize = 2000;
910    gam_terms::basis::default_num_centers(n, d) >= DENSE_CENTER_CAP
911}
912
913/// Map a Duchon/Matérn smoothness order onto the cascade's Sobolev order,
914/// clamped into the Wendland-(3,1) native window `(d/2, (d+3)/2]` (issue
915/// caveat 1: the multilevel frame can only represent up to `H^{(d+3)/2}`).
916fn cascade_sobolev_order(requested: f64, d: usize) -> f64 {
917    let lo = d as f64 / 2.0;
918    let hi = (d as f64 + 3.0) / 2.0;
919    // Nudge strictly inside the open lower bound when the request lands on it.
920    let eps = 1e-6 * (hi - lo);
921    requested.clamp(lo + eps, hi)
922}
923
924/// Detection seam for the O(n log n) multiresolution residual-cascade fast path
925/// (issue #1032).
926///
927/// This mirrors [`spline_scan_fast_path`] in shape but carries one CRITICAL
928/// difference dictated by the issue: the cascade is **not** the same posterior
929/// as the Duchon/Matérn term it stands in for (a different finite basis — the
930/// multilevel Wendland frame, not the reduced-rank radial kernel). So unlike
931/// the 1-D scan, which silently swaps an identical posterior, this path must
932/// only fire as an explicit alternative estimator on the structural signature
933/// the issue names, never as a transparent replacement. It returns `Some` only
934/// when ALL of the following hold:
935/// - family is Gaussian + identity link (the scattered low-d smooth the
936///   cascade solves);
937/// - none of the exotic-link / constraint / Firth / Kronecker / coefficient-
938///   group / hyperprior machinery is engaged;
939/// - the model is exactly one smooth term — no linear terms, no random
940///   effects, no by-variables;
941/// - that smooth is a scattered radial spatial smooth (`Duchon` or `Matern`)
942///   over `d ∈ {2, 3}` coordinates with no shape constraint;
943/// - the offset is identically zero and every weight is finite and positive;
944/// - `n` is past the derived dense-kernel cliff
945///   ([`past_dense_kernel_cliff`]) — below it the dense radial path is both
946///   exact-posterior and cheap, so there is no reason to change estimators.
947///
948/// The returned [`ResidualCascadeInputs`] carry a unit per-axis metric (the
949/// spec's isotropic radial distance); the quasi-uniformity guard inside
950/// [`gam_solve::residual_cascade::fit_residual_cascade`] (issue caveat 2)
951/// is the no-regression gate that refuses the iterative solve — and forces the
952/// caller back to the dense path — when a near-degenerate metric would break
953/// the BPX iteration bound.
954pub fn residual_cascade_fast_path(
955    request: &StandardFitRequest<'_>,
956) -> Option<ResidualCascadeInputs> {
957    if !request.family.is_gaussian_identity() {
958        return None;
959    }
960    if request.wiggle.is_some()
961        || request.latent_coord.is_some()
962        || !request.coefficient_groups.is_empty()
963        || !request.penalty_block_gamma_priors.is_empty()
964    {
965        return None;
966    }
967    let options = &request.options;
968    if options.latent_cloglog.is_some()
969        || options.mixture_link.is_some()
970        || options.sas_link.is_some()
971        || options.linear_constraints.is_some()
972        || options.adaptive_regularization.is_some()
973        || options.kronecker_penalty_system.is_some()
974        || options.kronecker_factored.is_some()
975        || options.firth_bias_reduction
976        || !options.nullspace_dims.is_empty()
977    {
978        return None;
979    }
980    let spec = &request.spec;
981    if !spec.linear_terms.is_empty()
982        || !spec.random_effect_terms.is_empty()
983        || spec.smooth_terms.len() != 1
984    {
985        return None;
986    }
987    let term = &spec.smooth_terms[0];
988    if !matches!(term.shape, gam_terms::smooth::ShapeConstraint::None)
989        || term.joint_null_rotation.is_some()
990    {
991        return None;
992    }
993    // Only scattered radial spatial smooths (Duchon / Matérn) over 2–3 axes.
994    // The Duchon spectral power `p + s` and the Matérn order set the requested
995    // Sobolev smoothness; both clamp into the Wendland native window.
996    let (feature_cols, requested_s) = match &term.basis {
997        gam_terms::smooth::SmoothBasisSpec::Duchon {
998            feature_cols, spec, ..
999        } => {
1000            // Pure-Duchon native order is `p + s` (kernel exponent 2(p+s)−d);
1001            // the multilevel frame targets the same continuum smoothness. `p`
1002            // is the polynomial nullspace degree, `s` the spectral power.
1003            let p = match spec.nullspace_order {
1004                gam_terms::basis::DuchonNullspaceOrder::Zero => 0.0,
1005                gam_terms::basis::DuchonNullspaceOrder::Linear => 1.0,
1006                gam_terms::basis::DuchonNullspaceOrder::Degree(k) => k as f64,
1007            };
1008            (feature_cols, spec.power + p)
1009        }
1010        gam_terms::smooth::SmoothBasisSpec::Matern {
1011            feature_cols, spec, ..
1012        } => {
1013            // Matérn smoothness ν sets native Sobolev order ν + d/2; the cascade
1014            // frame represents up to (d+3)/2, so the clamp below applies the
1015            // ceiling. (d is known just below from feature_cols.)
1016            let nu = spec.nu.half_integer_value();
1017            (feature_cols, nu + feature_cols.len() as f64 / 2.0)
1018        }
1019        _ => return None,
1020    };
1021    let d = feature_cols.len();
1022    if !(2..=3).contains(&d) {
1023        return None;
1024    }
1025    if request.offset.iter().any(|&v| v != 0.0) {
1026        return None;
1027    }
1028    if request.weights.iter().any(|&v| !(v.is_finite() && v > 0.0)) {
1029        return None;
1030    }
1031    let n = request.y.len();
1032    if n != request.data.nrows() || feature_cols.iter().any(|&c| c >= request.data.ncols()) {
1033        return None;
1034    }
1035    if !past_dense_kernel_cliff(n, d) {
1036        return None;
1037    }
1038    let coords: Vec<Vec<f64>> = feature_cols
1039        .iter()
1040        .map(|&c| request.data.column(c).iter().copied().collect())
1041        .collect();
1042    let y: Vec<f64> = request.y.iter().copied().collect();
1043    let w: Vec<f64> = request.weights.iter().copied().collect();
1044    if coords
1045        .iter()
1046        .any(|axis| axis.iter().any(|v| !v.is_finite()))
1047        || y.iter().any(|v| !v.is_finite())
1048    {
1049        return None;
1050    }
1051    let metric = vec![1.0_f64; d];
1052    let sobolev_s = cascade_sobolev_order(requested_s, d);
1053    Some(ResidualCascadeInputs {
1054        coords,
1055        y,
1056        w,
1057        metric,
1058        sobolev_s,
1059    })
1060}
1061
1062/// Formula-level library entry for the O(n log n) residual-cascade fast path
1063/// (issue #1032).
1064///
1065/// Materializes the formula exactly like [`fit_from_formula`], runs the
1066/// [`residual_cascade_fast_path`] detection, and — when it fires AND the
1067/// quasi-uniformity guard inside the cascade certifies the metric — returns the
1068/// certified [`ResidualCascadeFit`](gam_solve::residual_cascade::ResidualCascadeFit).
1069/// `Ok(None)` means EITHER the model is not the cascade-eligible shape OR the
1070/// quasi-uniformity guard rejected the metric; in both cases the caller falls
1071/// back to the dense [`fit_from_formula`] path (the cascade is a different
1072/// posterior, so the fallback is a genuine estimator choice, never a silent
1073/// swap). This keeps every persistence-bearing consumer on the dense fit until
1074/// the cascade payload schema lands.
1075pub fn fit_residual_cascade_from_formula(
1076    formula: &str,
1077    data: &Dataset,
1078    config: &FitConfig,
1079) -> Result<Option<gam_solve::residual_cascade::ResidualCascadeFit>, WorkflowError> {
1080    let mat = materialize(formula, data, config)?;
1081    let FitRequest::Standard(request) = mat.request else {
1082        return Ok(None);
1083    };
1084    let Some(inputs) = residual_cascade_fast_path(&request) else {
1085        return Ok(None);
1086    };
1087    let coord_refs: Vec<&[f64]> = inputs.coords.iter().map(Vec::as_slice).collect();
1088    match gam_solve::residual_cascade::fit_residual_cascade(
1089        &coord_refs,
1090        &inputs.y,
1091        &inputs.w,
1092        &inputs.metric,
1093        inputs.sobolev_s,
1094    ) {
1095        Ok(fit) => Ok(Some(fit)),
1096        // The quasi-uniformity guard (caveat 2) and any degenerate-design
1097        // signal both surface as a build/solve error; treat them as "not
1098        // cascade-eligible" so the caller falls back to the dense kernel path
1099        // rather than failing the fit outright.
1100        Err(_) => Ok(None),
1101    }
1102}
1103
1104/// Parse a formula, resolve it against a dataset, and produce a ready-to-fit `FitRequest`.
1105pub fn materialize<'a>(
1106    formula: &str,
1107    data: &'a Dataset,
1108    config: &FitConfig,
1109) -> Result<MaterializedModel<'a>, WorkflowError> {
1110    gam_gpu::configure_global_policy(config.gpu_policy);
1111    let parsed = parse_formula(formula)?;
1112    let col_map = data.column_map();
1113
1114    if let Some((left_col, right_col, event_col)) = parse_surv_interval_response(&parsed.response)?
1115    {
1116        if config.transformation_normal {
1117            return Err(WorkflowError::InvalidConfig {
1118                reason:
1119                    "transformation_normal cannot be combined with a SurvInterval(...) response"
1120                        .to_string(),
1121            });
1122        }
1123        // Interval censoring `T ∈ (L, R]` is only defined for the latent
1124        // hazard-window survival likelihood, whose kernel carries the
1125        // `log[S(L) − S(R)]` interval contribution. Route the left boundary `L`
1126        // through the standard exit channel and the right boundary `R` through
1127        // the dedicated interval-right channel; `event_col` distinguishes
1128        // bracketed (interval) rows from right-censored rows beyond the last
1129        // inspection (which carry an infinite/sentinel `R`).
1130        materialize_survival(
1131            &parsed,
1132            data,
1133            &col_map,
1134            config,
1135            None,
1136            &left_col,
1137            &event_col,
1138            Some(&right_col),
1139        )
1140    } else if let Some((entry_col, exit_col, event_col)) = parse_surv_response(&parsed.response)? {
1141        if config.transformation_normal {
1142            return Err(WorkflowError::InvalidConfig {
1143                reason: "transformation_normal cannot be combined with a Surv(...) response"
1144                    .to_string(),
1145            });
1146        }
1147        // `materialize_*` now return `WorkflowError` directly so the typed
1148        // `ColumnNotFound` payload (and any future variant-typed leaf
1149        // errors) survive the dispatcher hop instead of being flattened
1150        // into `IntegrationFailed { reason: String }`.
1151        materialize_survival(
1152            &parsed,
1153            data,
1154            &col_map,
1155            config,
1156            entry_col.as_deref(),
1157            &exit_col,
1158            &event_col,
1159            None,
1160        )
1161    } else {
1162        // Non-survival response: `timewiggle(...)` and `survmodel(...)` are
1163        // structurally meaningless (there is no baseline hazard / time axis to
1164        // wiggle and no survival likelihood to configure). They are parsed into
1165        // `ParsedFormula` but consumed *only* by `materialize_survival`; without
1166        // this guard every non-survival materializer below would silently drop
1167        // them, fitting an ordinary GAM while the user believes they requested a
1168        // time-varying / survival model (#371). Reject here — the single
1169        // chokepoint for all non-survival paths — mirroring the symmetric
1170        // auxiliary-formula rejection in `validate_auxiliary_formula_controls`.
1171        reject_survival_only_terms_for_nonsurvival(&parsed)?;
1172        // Symmetrically, the `config.survival_likelihood` *knob* selects a
1173        // survival likelihood mode read only by `materialize_survival`. On this
1174        // non-survival branch a non-default value (e.g. "weibull") would be
1175        // discarded and the fit would silently degrade to an ordinary GAM
1176        // (#1767). Reject it at the same chokepoint.
1177        reject_survival_likelihood_for_nonsurvival(config)?;
1178        if config.transformation_normal {
1179            // Issue #789A: a Bernoulli marginal-slope request with
1180            // `transformation_normal=true` used to dispatch as a CTN fit while
1181            // retaining marginal-slope controls, leaving the transformation path
1182            // in a non-advancing loop. CTN score calibration now uses the
1183            // explicit `ctn_stage1` recipe instead, so the legacy boolean is a
1184            // hard configuration error for marginal-slope requests.
1185            reject_marginal_slope_controls_for_transformation_normal(config)?;
1186            if config.noise_formula.is_some() {
1187                return Err(WorkflowError::InvalidConfig {
1188                    reason: "transformation_normal cannot be combined with noise_formula"
1189                        .to_string(),
1190                });
1191            }
1192            materialize_transformation_normal(&parsed, data, &col_map, config)
1193        } else if config.logslope_formula.is_some() || config.z_column.is_some() {
1194            materialize_bernoulli_marginal_slope(&parsed, data, &col_map, config)
1195        } else if config.noise_formula.is_some() {
1196            materialize_location_scale(&parsed, data, &col_map, config)
1197        } else {
1198            materialize_standard(&parsed, data, &col_map, config)
1199        }
1200    }
1201}
1202
1203#[cfg(test)]
1204mod sz_factor_smooth_recovery_tests {
1205    // `super::*` brings in `Dataset` (= gam_data::EncodedDataset), `FitConfig`,
1206    // `FitResult`, `StandardFitResult`, and `fit_from_formula`.
1207    use super::*;
1208
1209    const NOISE_SD: f64 = 0.20;
1210    const N: usize = 4000;
1211    const N_GROUPS: usize = 4;
1212
1213    /// A simple deterministic LCG so the dataset is reproducible without pulling
1214    /// an RNG dependency into the test.
1215    struct Lcg(u64);
1216    impl Lcg {
1217        fn next_u64(&mut self) -> u64 {
1218            // Numerical Recipes LCG constants.
1219            self.0 = self.0.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
1220            self.0
1221        }
1222        /// Uniform in [0, 1).
1223        fn unif(&mut self) -> f64 {
1224            (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
1225        }
1226        /// Standard normal via Box–Muller (one of the pair).
1227        fn normal(&mut self) -> f64 {
1228            let u1 = (self.unif()).max(1e-12);
1229            let u2 = self.unif();
1230            (-2.0 * u1.ln()).sqrt() * (std::f64::consts::TAU * u2).cos()
1231        }
1232    }
1233
1234    /// Data drawn from EXACTLY the `sz` model class: a shared smooth `f0(x)` plus
1235    /// zero-sum per-group deviations `d_g(x)` (phase-shifted sinusoids whose
1236    /// cross-group mean is removed at every `x`), plus observation noise. This
1237    /// mirrors the (blocked) Python bug-hunt test `tests/bug_hunt_sz_factor_
1238    /// smooth_underfits_own_model_class_test.py`.
1239    ///
1240    /// Written to a CSV and loaded through the real `load_dataset_projected`
1241    /// inferer so the grouping column `g` (string levels) is encoded as a genuine
1242    /// categorical exactly as production does — hand-built `EncodedDataset`s do
1243    /// not carry the categorical level map the factor-smooth level resolver needs.
1244    fn sz_class_dataset() -> (Dataset, tempfile::TempDir) {
1245        let mut rng = Lcg(0x5326_2026_0628_1605);
1246        let phases: Vec<f64> = (0..N_GROUPS)
1247            .map(|k| 1.2 * k as f64 / (N_GROUPS as f64 - 1.0))
1248            .collect();
1249        let deviations = |xi: f64| -> Vec<f64> {
1250            let vals: Vec<f64> = phases
1251                .iter()
1252                .map(|p| 0.6 * (std::f64::consts::TAU * xi + std::f64::consts::TAU * p).sin())
1253                .collect();
1254            let mean = vals.iter().sum::<f64>() / vals.len() as f64;
1255            vals.iter().map(|v| v - mean).collect()
1256        };
1257
1258        let mut csv = String::from("y,x,g\n");
1259        for _ in 0..N {
1260            let x = rng.unif();
1261            // Use the HIGH bits (via `unif`) for the group draw — an LCG's low
1262            // bits have a tiny period and would collapse `% N_GROUPS` to a near
1263            // constant.
1264            let g = ((rng.unif() * N_GROUPS as f64) as usize).min(N_GROUPS - 1);
1265            let f0 = (std::f64::consts::TAU * x).sin();
1266            let mu = f0 + deviations(x)[g];
1267            let y = mu + NOISE_SD * rng.normal();
1268            csv.push_str(&format!("{y},{x},g{g}\n"));
1269        }
1270        let td = tempfile::tempdir().expect("tempdir");
1271        let path = td.path().join("sz_class.csv");
1272        std::fs::write(&path, csv).expect("write sz-class csv");
1273        // Force `g` into a categorical role exactly as the formula intends so the
1274        // factor-smooth level resolver sees all `N_GROUPS` distinct levels.
1275        let mut roles = std::collections::HashSet::new();
1276        roles.insert("g");
1277        let data = gam_data::load_dataset_projected_with_categorical_roles(
1278            &path,
1279            &["y".to_string(), "x".to_string(), "g".to_string()],
1280            &roles,
1281        )
1282        .expect("load sz-class dataset");
1283        (data, td)
1284    }
1285
1286    fn gaussian_config() -> FitConfig {
1287        FitConfig { family: Some("gaussian".to_string()), ..FitConfig::default() }
1288    }
1289
1290    /// In-sample residual sd of a fitted standard GAM: `sd(y − Xβ̂)`.
1291    fn residual_sd(fit: &StandardFitResult, data: &Dataset) -> f64 {
1292        let beta = &fit.fit.beta;
1293        let design = &fit.design.design;
1294        let n = design.nrows();
1295        assert_eq!(design.ncols(), beta.len(), "design/beta width mismatch");
1296        let mut fitted = vec![0.0f64; n];
1297        // `try_row_chunk` materializes contiguous row blocks of whatever design
1298        // storage the fit used (dense or block-lazy) — robust to the storage kind.
1299        const CHUNK: usize = 512;
1300        let mut start = 0usize;
1301        while start < n {
1302            let end = (start + CHUNK).min(n);
1303            let block = design
1304                .try_row_chunk(start..end)
1305                .expect("materialize design row chunk");
1306            for (r, row) in block.rows().into_iter().enumerate() {
1307                let mut acc = 0.0;
1308                for (c, &xv) in row.iter().enumerate() {
1309                    acc += xv * beta[c];
1310                }
1311                fitted[start + r] = acc;
1312            }
1313            start = end;
1314        }
1315        let y = data.values.column(0);
1316        let resid: Vec<f64> = y.iter().zip(fitted.iter()).map(|(&yi, &fi)| yi - fi).collect();
1317        let mean = resid.iter().sum::<f64>() / resid.len() as f64;
1318        let var = resid.iter().map(|r| (r - mean).powi(2)).sum::<f64>() / resid.len() as f64;
1319        var.sqrt()
1320    }
1321
1322    fn fit_standard(formula: &str, data: &Dataset) -> StandardFitResult {
1323        match fit_from_formula(formula, data, &gaussian_config())
1324            .unwrap_or_else(|e| panic!("fit `{formula}` failed: {e:?}"))
1325        {
1326            FitResult::Standard(r) => r,
1327            other => panic!("expected Standard fit for `{formula}`, got a different variant: {}",
1328                std::any::type_name_of_val(&other)),
1329        }
1330    }
1331
1332    /// #1605 (gold standard, end-to-end REML fit): the sum-to-zero factor smooth
1333    /// `s(x) + s(g, x, bs="sz")` must RECOVER data drawn from its own model class
1334    /// to the observation-noise floor, exactly as the strictly-more-general
1335    /// `s(x, g, bs="fs")` superset provably does.
1336    ///
1337    /// The recovery gap (`sz` resid ≈ 0.43 ≈ 2.1× the 0.20 floor while `fs`
1338    /// reaches the floor) was closed by THREE mgcv-faithful corrections, each
1339    /// necessary, that this end-to-end fit jointly exercises:
1340    ///   1. marginal basis (baef17e): cr → curvature-capable B-spline, so a
1341    ///      deviation with non-zero boundary curvature is representable;
1342    ///   2. ownership/overlap residualization (b49bb5c): the `sz` deviation is
1343    ///      sum-to-zero ACROSS the grouping factor, hence orthogonal to a
1344    ///      factor-independent owner like the shared `s(x)`. Residualizing it
1345    ///      against `s(x)`'s realized span (the #978 chart) collapsed every
1346    ///      group's curve to a flat per-group contrast; skipping that ownership
1347    ///      (same family as the #1276 factor-`by` level gate) restores the curve
1348    ///      shape and stops REML railing the shared `s(x)` wiggliness λ;
1349    ///   3. null-space ridge (this change): the `sz` deviation blocks now carry
1350    ///      the per-null-dimension ridge structure of `fs`, mapped into the
1351    ///      zero-sum contrast space, so the {const, linear} null space is
1352    ///      shrinkable per dimension (the #700/#712/#713 partial-pooling form)
1353    ///      rather than left free — without breaking the zero-sum constraint.
1354    ///
1355    /// This is the gold-standard verification: it drives the real
1356    /// `fit_from_formula` REML λ-selection on data drawn from exactly the `sz`
1357    /// model class and asserts `sz` reaches the floor (and a `fs` control does
1358    /// too). It failed before the fixes and passes after.
1359    #[test]
1360    fn sz_factor_smooth_recovers_its_own_model_class_end_to_end() {
1361        let (data, _td) = sz_class_dataset();
1362
1363        // Control: bs="fs", a strict superset of the sz span, must reach the
1364        // noise floor — proves the data is well-posed and pins the floor.
1365        let fs_fit = fit_standard("y ~ s(x, g, bs='fs')", &data);
1366        let fs_resid = residual_sd(&fs_fit, &data);
1367        assert!(
1368            fs_resid < 1.2 * NOISE_SD,
1369            "control bs='fs' did not reach the noise floor: resid_sd={fs_resid:.4} \
1370             vs noise_sd={NOISE_SD} (data/floor sanity check)",
1371        );
1372
1373        // The documented sz idiom on data drawn from the sz model class.
1374        let sz_fit = fit_standard("y ~ s(x) + s(g, x, bs='sz')", &data);
1375        let sz_resid = residual_sd(&sz_fit, &data);
1376
1377        // A smoother whose span contains the truth, fit at large n, must explain
1378        // the systematic structure and leave ~only observation noise.
1379        assert!(
1380            sz_resid < 1.4 * NOISE_SD,
1381            "bs='sz' under-fits its own model class: resid_sd={sz_resid:.4} \
1382             ({:.2}x the noise floor {NOISE_SD}); the bs='fs' superset reached \
1383             {fs_resid:.4}. The sz fit leaves systematic signal in the residual.",
1384            sz_resid / NOISE_SD,
1385        );
1386
1387        // Comparative guard: sz must not be dramatically worse than the fs
1388        // superset that recovers the same data.
1389        assert!(
1390            sz_resid < 1.5 * fs_resid,
1391            "bs='sz' residual {sz_resid:.4} is {:.2}x the bs='fs' residual \
1392             {fs_resid:.4} on identical sz-class data",
1393            sz_resid / fs_resid,
1394        );
1395    }
1396}