Skip to main content

gam_terms/
term_builder.rs

1//! Term construction: bridge from parsed formula terms to `TermCollectionSpec`.
2//!
3//! This module takes the AST produced by `inference::formula_dsl` and a loaded
4//! dataset, resolves column references, infers knot counts and center strategies,
5//! and produces a `TermCollectionSpec` ready for `build_term_collection_design`.
6
7use std::collections::{BTreeMap, BTreeSet, HashMap};
8use std::path::PathBuf;
9
10use ndarray::{Array2, ArrayView1};
11
12use crate::basis::{
13    BSplineBasisSpec, BSplineBoundaryConditions, BSplineEndpointBoundaryCondition,
14    BSplineIdentifiability, BSplineKnotSpec, CenterCountRequest, CenterStrategy,
15    ConstantCurvatureBasisSpec, ConstantCurvatureIdentifiability, DuchonBasisSpec,
16    DuchonNullspaceOrder, DuchonOperatorPenaltySpec, MaternBasisSpec, MaternIdentifiability,
17    MaternNu, MeasureJetBasisSpec, MeasureJetIdentifiability, OneDimensionalBoundary,
18    SpatialIdentifiability, SphereMethod, SphereWahbaKernel, SphericalSplineBasisSpec,
19    SphericalSplineIdentifiability, ThinPlateBasisSpec, auto_spatial_center_strategy,
20    default_num_centers, default_spatial_center_strategy, default_spherical_harmonic_degree,
21    plan_spatial_basis, thin_plate_penalty_order,
22};
23use crate::inference::formula_dsl::{
24    ParsedTerm, SmoothKind, option_bool, option_f64, option_f64_strict, option_usize,
25    option_usize_any, option_usize_any_strict, option_usize_strict, strip_quotes,
26};
27use crate::smooth::{
28    ByVarKind, FactorSmoothFlavour, FactorSmoothSpec, LinearCoefficientGeometry, LinearTermSpec,
29    RandomEffectTermSpec, ShapeConstraint, SmoothBasisSpec, SmoothTermSpec,
30    TensorBSplineIdentifiability, TensorBSplinePenaltyDecomposition, TensorBSplineSpec,
31    TermCollectionSpec,
32};
33use gam_problem::types::ColIdx;
34use gam_data::{ColumnKindTag, DataError, EncodedDataset as Dataset};
35use gam_runtime::resource::ResourcePolicy;
36
37/// Default B-spline degree when a smooth's `degree=` option is absent. Cubic
38/// (degree 3) is the standard GAM convention: C² continuity with a low knot
39/// count.
40const DEFAULT_BSPLINE_DEGREE: usize = 3;
41
42/// Default difference-penalty order when a smooth's `penalty_order=` (alias
43/// `m=`) option is absent. Second-order (curvature) is the standard P-spline
44/// convention.
45const DEFAULT_PENALTY_ORDER: usize = 2;
46
47/// Default basis dimension for one-dimensional cyclic cubic P-splines.
48///
49/// Periodic smooths spend no coefficients on free endpoints, so they should not
50/// inherit the larger open B-spline knot ceiling by default.  This is still only
51/// a default: callers can request a richer periodic space with `k=`.
52const CYCLIC_DEFAULT_BASIS_DIM: usize = 12;
53
54/// Default shared-marginal basis dimension for `bs="fs"`/`bs="sz"` factor smooths,
55/// matching mgcv's factor-smooth default `k=10`. A factor smooth shares one
56/// marginal across all levels; a modest basis recovers the shared signal without
57/// over-fitting each group's within-group noise (gam#903). Overridden by an
58/// explicit `k`/`basis_dim`.
59const FACTOR_SMOOTH_DEFAULT_BASIS_DIM: usize = 10;
60
61/// Default row-chunk size for the out-of-core PCA-basis smooth when the
62/// `chunk_size=` option is absent. Streams the design in row blocks to bound
63/// peak memory independent of the dataset row count.
64const DEFAULT_PCA_CHUNK_SIZE: usize = 4096;
65
66// ---------------------------------------------------------------------------
67// Typed errors
68// ---------------------------------------------------------------------------
69
70/// Typed errors emitted by term-builder helpers. `Display` reproduces the exact
71/// pre-refactor `format!(...)` text byte-for-byte, so callers that string-match
72/// on the message (tests, log assertions) keep working unchanged. Public-API
73/// functions still return `Result<_, String>` and use `.to_string()` shims at
74/// their boundary to stay compatible with callers in protected modules.
75#[derive(Clone, Debug)]
76pub enum TermBuilderError {
77    /// Column-resolution / column-kind lookup failures whose context is purely
78    /// internal (column-kind table out-of-sync, alias map missing an entry,
79    /// etc.). User-facing "this formula references a column that doesn't
80    /// exist" diagnostics use the dedicated `ColumnNotFound` variant so the
81    /// FFI boundary can lift the structured payload into a Python
82    /// `ColumnNotFoundError` without parsing prose.
83    MissingColumn { reason: String },
84    /// A formula referenced a column that is not present in the input data.
85    /// Mirrors `DataError::ColumnNotFound` field-for-field so the conversion
86    /// across module boundaries is a pure data move (no re-derivation, no
87    /// string re-parsing). Public callers see byte-identical `Display`
88    /// output to the legacy `missing_column_message` text.
89    ColumnNotFound {
90        name: String,
91        role: Option<String>,
92        available: Vec<String>,
93        similar: Vec<String>,
94        tsv_hint: bool,
95    },
96    /// User-specified configuration is internally inconsistent (e.g. too few
97    /// variables for a smooth type, conflicting size options, requested basis
98    /// dimension below the polynomial nullspace).
99    IncompatibleConfig { reason: String },
100    /// Option parsing failure: malformed numeric expression, unknown option
101    /// key, out-of-range integer, list-length mismatch, etc.
102    InvalidOption { reason: String },
103    /// User requested a feature that is intentionally not supported (unknown
104    /// smooth type / method / kernel / identifiability, non-zero anchor,
105    /// internal-only token, etc.).
106    UnsupportedFeature { reason: String },
107    /// Input data is degenerate for the requested term (constant column,
108    /// non-finite categorical entries, ...).
109    DegenerateData { reason: String },
110    /// Term-collection-stage formula error — a node that the caller was
111    /// supposed to resolve upstream reached the builder.
112    MalformedFormula { reason: String },
113}
114
115impl std::fmt::Display for TermBuilderError {
116    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117        match self {
118            TermBuilderError::MissingColumn { reason }
119            | TermBuilderError::IncompatibleConfig { reason }
120            | TermBuilderError::InvalidOption { reason }
121            | TermBuilderError::UnsupportedFeature { reason }
122            | TermBuilderError::DegenerateData { reason }
123            | TermBuilderError::MalformedFormula { reason } => f.write_str(reason),
124            // Delegate to the canonical `DataError::ColumnNotFound` formatter
125            // so a single source of truth defines the human text. The
126            // intermediate `DataError` constructed here owns its strings only
127            // for the duration of the Display call — no allocation cost
128            // beyond the original payload that this variant already holds.
129            TermBuilderError::ColumnNotFound {
130                name,
131                role,
132                available,
133                similar,
134                tsv_hint,
135            } => {
136                let canonical = DataError::ColumnNotFound {
137                    name: name.clone(),
138                    role: role.clone(),
139                    available: available.clone(),
140                    similar: similar.clone(),
141                    tsv_hint: *tsv_hint,
142                };
143                std::fmt::Display::fmt(&canonical, f)
144            }
145        }
146    }
147}
148
149impl From<TermBuilderError> for String {
150    fn from(err: TermBuilderError) -> String {
151        err.to_string()
152    }
153}
154
155/// Catchall lift for the term-builder's internal `Result<_, String>` helpers
156/// (numeric expression parsing, option lookup, boundary-condition parsing,
157/// ...) that flow into `build_termspec` via `?`. Maps to
158/// `IncompatibleConfig`, which is the most appropriate generic bucket for
159/// option/config-style failures — leaf sites that emit structured payloads
160/// (`From<DataError>` for column-not-found) bypass this fallback.
161impl From<String> for TermBuilderError {
162    fn from(reason: String) -> Self {
163        Self::IncompatibleConfig { reason }
164    }
165}
166
167/// Typed lift from data-layer errors. `DataError::ColumnNotFound` becomes
168/// `TermBuilderError::ColumnNotFound` field-for-field — no stringification,
169/// no information loss — so the FFI boundary downstream can dispatch on
170/// the typed variant. Other `DataError` variants degrade into
171/// `MissingColumn` since they describe column-resolution-time failures
172/// without a dedicated structured destination.
173impl From<DataError> for TermBuilderError {
174    fn from(err: DataError) -> Self {
175        match err {
176            DataError::ColumnNotFound {
177                name,
178                role,
179                available,
180                similar,
181                tsv_hint,
182            } => Self::ColumnNotFound {
183                name,
184                role,
185                available,
186                similar,
187                tsv_hint,
188            },
189            DataError::SchemaMismatch { reason }
190            | DataError::ParseError { reason }
191            | DataError::EncodingFailure { reason }
192            | DataError::EmptyInput { reason }
193            | DataError::InvalidValue { reason } => Self::MissingColumn { reason },
194        }
195    }
196}
197
198// Constructor helpers — keep error-site code compact and consistent.
199impl TermBuilderError {
200    #[inline]
201    fn missing_column(reason: impl Into<String>) -> Self {
202        TermBuilderError::MissingColumn {
203            reason: reason.into(),
204        }
205    }
206    #[inline]
207    fn incompatible_config(reason: impl Into<String>) -> Self {
208        TermBuilderError::IncompatibleConfig {
209            reason: reason.into(),
210        }
211    }
212    #[inline]
213    fn invalid_option(reason: impl Into<String>) -> Self {
214        TermBuilderError::InvalidOption {
215            reason: reason.into(),
216        }
217    }
218    #[inline]
219    fn unsupported_feature(reason: impl Into<String>) -> Self {
220        TermBuilderError::UnsupportedFeature {
221            reason: reason.into(),
222        }
223    }
224    #[inline]
225    fn degenerate_data(reason: impl Into<String>) -> Self {
226        TermBuilderError::DegenerateData {
227            reason: reason.into(),
228        }
229    }
230    #[inline]
231    fn malformed_formula(reason: impl Into<String>) -> Self {
232        TermBuilderError::MalformedFormula {
233            reason: reason.into(),
234        }
235    }
236}
237
238// ---------------------------------------------------------------------------
239// Column resolution
240// ---------------------------------------------------------------------------
241
242/// Resolve a bare column name to its index, returning a typed
243/// `DataError::ColumnNotFound` on miss so the FFI boundary can surface a
244/// structured `gamfit.ColumnNotFoundError(column=…, available=…)` rather
245/// than rely on string-classification of human prose. Internal callers that
246/// still flow `Result<_, String>` get byte-identical text via
247/// `From<DataError> for String`.
248pub fn resolve_col(col_map: &HashMap<String, usize>, name: &str) -> Result<usize, DataError> {
249    col_map
250        .get(name)
251        .copied()
252        .ok_or_else(|| DataError::column_not_found(col_map, name, None))
253}
254
255/// Like `resolve_col` but tags the missing-column payload with a role label
256/// (`"response"`, `"entry"`, `"exit"`, `"event"`, `"z"`, `"id"`, …) so the
257/// boundary-side Python exception can disambiguate which formula slot held
258/// the bad reference.
259pub fn resolve_role_col(
260    col_map: &HashMap<String, usize>,
261    name: &str,
262    role: &str,
263) -> Result<usize, DataError> {
264    col_map
265        .get(name)
266        .copied()
267        .ok_or_else(|| DataError::column_not_found(col_map, name, Some(role)))
268}
269
270fn encoded_levels_for_column(ds: &Dataset, col: ColIdx) -> Vec<(u64, String)> {
271    let mut seen = BTreeSet::<u64>::new();
272    for value in ds.values.column(col.get()) {
273        if value.is_finite() {
274            seen.insert(value.to_bits());
275        }
276    }
277    let schema_levels = ds
278        .schema
279        .columns
280        .get(col.get())
281        .map(|column| column.levels.as_slice())
282        .unwrap_or(&[]);
283    seen.into_iter()
284        .enumerate()
285        .map(|(idx, bits)| {
286            let fallback = format!("level{}", idx + 1);
287            let label = schema_levels.get(idx).cloned().unwrap_or(fallback);
288            (bits, label)
289        })
290        .collect()
291}
292
293pub fn column_map_with_alias(
294    col_map: &HashMap<String, usize>,
295    alias: &str,
296    target_column: &str,
297) -> HashMap<String, usize> {
298    let mut aliased = col_map.clone();
299    if let Some(idx) = col_map.get(target_column).copied() {
300        aliased.entry(alias.to_string()).or_insert(idx);
301    }
302    aliased
303}
304
305// ---------------------------------------------------------------------------
306// ParsedTerm[] + Dataset → TermCollectionSpec
307// ---------------------------------------------------------------------------
308
309pub fn build_termspec(
310    terms: &[ParsedTerm],
311    ds: &Dataset,
312    col_map: &HashMap<String, usize>,
313    inference_notes: &mut Vec<String>,
314    policy: &ResourcePolicy,
315) -> Result<TermCollectionSpec, TermBuilderError> {
316    let mut linear_terms = Vec::<LinearTermSpec>::new();
317    let mut random_terms = Vec::<RandomEffectTermSpec>::new();
318    let mut smooth_terms = Vec::<SmoothTermSpec>::new();
319    let smooth_coordinate_count = terms
320        .iter()
321        .map(|term| match term {
322            ParsedTerm::Smooth { vars, .. } => vars.len(),
323            _ => 0,
324        })
325        .sum::<usize>();
326
327    for t in terms {
328        match t {
329            ParsedTerm::Linear {
330                name,
331                explicit,
332                coefficient_min,
333                coefficient_max,
334            } => {
335                let col = resolve_col(col_map, name)?;
336                let auto_kind = ds.column_kinds.get(col).copied().ok_or_else(|| {
337                    TermBuilderError::missing_column(format!(
338                        "internal column-kind lookup failed for '{name}'"
339                    ))
340                    .to_string()
341                })?;
342                if *explicit {
343                    linear_terms.push(LinearTermSpec {
344                        name: name.clone(),
345                        feature_col: col,
346                        feature_cols: vec![col],
347                        categorical_levels: vec![],
348                        // Parametric linear terms are unpenalized by default
349                        // (MLE, matching mgcv/glm); see #749.
350                        double_penalty: false,
351                        coefficient_geometry: LinearCoefficientGeometry::Unconstrained,
352                        coefficient_min: *coefficient_min,
353                        coefficient_max: *coefficient_max,
354                    });
355                } else {
356                    match auto_kind {
357                        ColumnKindTag::Continuous | ColumnKindTag::Binary => {
358                            linear_terms.push(LinearTermSpec {
359                                name: name.clone(),
360                                feature_col: col,
361                                feature_cols: vec![col],
362                                categorical_levels: vec![],
363                                // Unpenalized parametric effect by default (#749).
364                                double_penalty: false,
365                                coefficient_geometry: LinearCoefficientGeometry::Unconstrained,
366                                coefficient_min: *coefficient_min,
367                                coefficient_max: *coefficient_max,
368                            });
369                        }
370                        ColumnKindTag::Categorical => {
371                            if coefficient_min.is_some() || coefficient_max.is_some() {
372                                return Err(TermBuilderError::incompatible_config(format!(
373                                    "coefficient constraints are not supported for categorical auto-random-effect term '{name}'; use group({name}) or an unconstrained numeric term"
374                                )));
375                            }
376                            random_terms.push(RandomEffectTermSpec {
377                                name: name.clone(),
378                                feature_col: col,
379                                drop_first_level: false,
380                                penalized: true,
381                                frozen_levels: None,
382                            });
383                        }
384                    }
385                }
386            }
387            ParsedTerm::BoundedLinear {
388                name,
389                min,
390                max,
391                prior,
392            } => {
393                let col = resolve_col(col_map, name)?;
394                let auto_kind = ds.column_kinds.get(col).copied().ok_or_else(|| {
395                    TermBuilderError::missing_column(format!(
396                        "internal column-kind lookup failed for '{name}'"
397                    ))
398                    .to_string()
399                })?;
400                if !matches!(auto_kind, ColumnKindTag::Continuous | ColumnKindTag::Binary) {
401                    return Err(TermBuilderError::incompatible_config(format!(
402                        "bounded() currently supports only numeric columns, got categorical '{name}'"
403                    )));
404                }
405                linear_terms.push(LinearTermSpec {
406                    name: name.clone(),
407                    feature_col: col,
408                    feature_cols: vec![col],
409                    categorical_levels: vec![],
410                    double_penalty: false,
411                    coefficient_geometry: LinearCoefficientGeometry::Bounded {
412                        min: *min,
413                        max: *max,
414                        prior: prior.clone(),
415                    },
416                    coefficient_min: None,
417                    coefficient_max: None,
418                });
419            }
420            ParsedTerm::RandomEffect { name } => {
421                let col = resolve_col(col_map, name)?;
422                random_terms.push(RandomEffectTermSpec {
423                    name: name.clone(),
424                    feature_col: col,
425                    drop_first_level: false,
426                    penalized: true,
427                    frozen_levels: None,
428                });
429            }
430            ParsedTerm::Smooth {
431                label,
432                vars,
433                kind,
434                options,
435            } => {
436                let smooth_vars = vars.clone();
437                let by_name = options.get("by").cloned();
438                // `bs="sz"` (sum-to-zero), like `bs="fs"`/`bs="re"`, is a
439                // factor-smooth family handled natively by `build_smooth_basis`'s
440                // fs/sz/re path: it detects the categorical factor among the
441                // variables and emits a `SmoothBasisSpec::FactorSmooth { Sz }`
442                // with the correct single-penalty marginal and modest default
443                // basis. Route sz straight through `build_smooth_basis` rather
444                // than intercepting it into a legacy `FactorSumToZero` envelope
445                // here (which left `sz(fac, x)` mis-typed as `FactorSumToZero`
446                // instead of the expected `FactorSmooth { Sz }`).
447                let cols = smooth_vars
448                    .iter()
449                    .map(|v| resolve_col(col_map, v))
450                    .collect::<Result<Vec<_>, _>>()?;
451                let mut inner_options = options.clone();
452                inner_options.remove("by");
453                // `ordered=` is consumed here (ByVarKind::Factor routing) and
454                // must not propagate to the inner basis builder, which has no
455                // allow-list entry for it and would reject it as an unknown option.
456                inner_options.remove("ordered");
457                // Pop the shape constraint before `build_smooth_basis` runs so
458                // it never reaches the per-kind `validate_known_options`
459                // allow-lists (the constraint is a property of the smooth term,
460                // not of any one basis kind). Basis-incompatible requests still
461                // fail loudly downstream via `shape_supports_basis`.
462                let shape = match inner_options.remove("shape") {
463                    None => ShapeConstraint::None,
464                    Some(raw) => crate::smooth::parse_shape_constraint(&raw)
465                        .map_err(TermBuilderError::invalid_option)?,
466                };
467                let inner_basis = build_smooth_basis(
468                    *kind,
469                    &smooth_vars,
470                    &cols,
471                    &inner_options,
472                    ds,
473                    inference_notes,
474                    policy,
475                    smooth_coordinate_count,
476                )?;
477                if let Some(by_name) = by_name {
478                    let by_col = resolve_col(col_map, &by_name)?;
479                    match ds.column_kinds.get(by_col).copied().ok_or_else(|| {
480                        format!("internal column-kind lookup failed for by variable '{by_name}'")
481                    })? {
482                        ColumnKindTag::Categorical => {
483                            let levels = encoded_levels_for_column(ds, ColIdx::new(by_col));
484                            // A penalized random block for this factor already
485                            // owns its full level offsets when EITHER an explicit
486                            // `group(factor)` appears, OR a *bare* categorical
487                            // `+ factor` does — the latter is auto-promoted to a
488                            // penalized random-effect block (see the
489                            // `ParsedTerm::Linear` / `ColumnKindTag::Categorical`
490                            // arm above, `penalized: true`). Both representations
491                            // carry the same per-level offsets, so #1457: the
492                            // `by=` branch must NOT additionally add its own
493                            // unpenalized treatment-coded main effect, which would
494                            // double-represent the factor (two `g` design blocks +
495                            // a spurious extra smoothing parameter).
496                            let penalized_group_owner_present =
497                                terms.iter().any(|other| match other {
498                                    ParsedTerm::RandomEffect { name } => name == &by_name,
499                                    ParsedTerm::Linear {
500                                        name,
501                                        explicit: false,
502                                        ..
503                                    } if name == &by_name => col_map
504                                        .get(name)
505                                        .and_then(|c| ds.column_kinds.get(*c).copied())
506                                        .map(|kind| matches!(kind, ColumnKindTag::Categorical))
507                                        .unwrap_or(false),
508                                    _ => false,
509                                });
510                            // Add an unpenalized treatment-coded fixed main
511                            // effect for a standalone factor-by smooth, unless
512                            // the same factor already has an explicit
513                            // `group(factor)` term OR a bare categorical `+
514                            // factor` that was auto-promoted to a penalized
515                            // random block (#1457).  In those mixed-model forms
516                            // the penalized random intercept is the coherent
517                            // owner of level offsets; adding a no-pooling fixed
518                            // factor effect would bypass random-effect
519                            // shrinkage and degrade BLUP-style predictions.
520                            if !random_terms.iter().any(|rt| rt.name == by_name)
521                                && !penalized_group_owner_present
522                            {
523                                random_terms.push(RandomEffectTermSpec {
524                                    name: by_name.clone(),
525                                    feature_col: by_col,
526                                    drop_first_level: true,
527                                    penalized: false,
528                                    frozen_levels: None,
529                                });
530                            }
531                            // Route to a single BySmooth::Factor term with
532                            // frozen levels pre-populated from the training data.
533                            // Design building later gates each level into its own
534                            // column block (see build_by_smooth_local in term_specs).
535                            let frozen_levels: Vec<u64> =
536                                levels.iter().map(|(bits, _)| *bits).collect();
537                            smooth_terms.push(SmoothTermSpec {
538                                name: label.clone(),
539                                basis: SmoothBasisSpec::BySmooth {
540                                    smooth: Box::new(inner_basis),
541                                    by_kind: ByVarKind::Factor {
542                                        feature_col: by_col,
543                                        ordered: option_bool(options, "ordered").unwrap_or(false),
544                                        frozen_levels: Some(frozen_levels),
545                                    },
546                                },
547                                shape,
548                                joint_null_rotation: None,
549                            });
550                        }
551                        ColumnKindTag::Binary | ColumnKindTag::Continuous => {
552                            smooth_terms.push(SmoothTermSpec {
553                                name: label.clone(),
554                                basis: SmoothBasisSpec::BySmooth {
555                                    smooth: Box::new(inner_basis),
556                                    by_kind: ByVarKind::Numeric {
557                                        feature_col: by_col,
558                                    },
559                                },
560                                shape,
561                                joint_null_rotation: None,
562                            });
563                        }
564                    }
565                } else {
566                    smooth_terms.push(SmoothTermSpec {
567                        name: label.clone(),
568                        basis: inner_basis,
569                        shape,
570                        joint_null_rotation: None,
571                    });
572                }
573            }
574            ParsedTerm::LinkWiggle { .. }
575            | ParsedTerm::TimeWiggle { .. }
576            | ParsedTerm::LinkConfig { .. }
577            | ParsedTerm::SurvivalConfig { .. } => {
578                // Consumed at formula level, not design terms.
579            }
580            ParsedTerm::LogSlopeSurface { .. } => {
581                return Err(TermBuilderError::malformed_formula(
582                    "logslope(...) declarations must be resolved by the marginal-slope formula path before building a term spec",
583                ));
584            }
585            ParsedTerm::Interaction { vars } => {
586                // A linear `:` interaction realizes one design column equal to
587                // the elementwise product of its operands. Numeric (continuous/
588                // binary) operands multiply directly; a categorical operand is
589                // a factor, so the product is expanded factor-aware: one design
590                // column per surviving cell of the factor(s), each an indicator
591                // `1[factor == level]` gating the numeric product.
592                //
593                // Coding is MARGINALITY-AWARE (gam#1158, gam#1159). A categorical
594                // operand `g` is treatment-coded (its lexicographically first
595                // reference level dropped) ONLY when the lower-order term obtained
596                // by removing `g` from this interaction is also present in the
597                // model — that lower-order term is what makes the dropped level
598                // identifiable, exactly mgcv's marginality rule. When that parent
599                // is ABSENT (the interaction-only form), dropping the reference
600                // level instead pins a group to the reference fit (a rank-deficient
601                // design), so we keep ALL levels (full dummy coding) and rely on a
602                // single intercept cell-drop below for identifiability:
603                //   * `y ~ x:g` with no `x` main effect → "common intercept,
604                //     separate slopes": every group keeps its own x-slope.
605                //   * `y ~ g:h` with no `g`/`h` main effects → the saturated
606                //     cell-means model: full cross of all levels minus one
607                //     reference cell absorbed by the intercept.
608                // When the parents ARE present (`x + x:g`, or `g*h` = `g + h +
609                // g:h`), the historical treatment coding is preserved so those
610                // forms stay correct.
611                //
612                // A main effect for var V is a `Linear`/`BoundedLinear`/
613                // `RandomEffect` ParsedTerm whose referenced name is V (an
614                // auto-detected categorical `Linear` becomes a RandomEffect main
615                // effect; either spelling counts). We only treat such standalone
616                // main-effect terms as parents — not V appearing inside another
617                // interaction.
618                let main_effect_present = |target: &str| -> bool {
619                    terms.iter().any(|other| match other {
620                        ParsedTerm::Linear { name, .. }
621                        | ParsedTerm::BoundedLinear { name, .. }
622                        | ParsedTerm::RandomEffect { name } => name == target,
623                        _ => false,
624                    })
625                };
626                // The lower-order parent of dropping operand `drop_var` from this
627                // interaction is present iff EVERY other operand is a main effect.
628                // For the two cases we care about (`x:g`, `g:h`) the interaction
629                // has two operands, so this reduces to "is the single remaining
630                // operand a main effect"; the general form handles any arity.
631                let parent_present = |drop_var: &str| -> bool {
632                    vars.iter()
633                        .filter(|v| v.as_str() != drop_var)
634                        .all(|v| main_effect_present(v))
635                };
636
637                let mut numeric_cols = Vec::<usize>::new();
638                // Per categorical operand: (var name, col, kept levels, was the
639                // reference level dropped / treatment-coded?).
640                let mut categorical_factors =
641                    Vec::<(String, usize, Vec<(u64, String)>, bool)>::new();
642                for var in vars {
643                    let col = resolve_col(col_map, var)?;
644                    let kind = ds.column_kinds.get(col).copied().ok_or_else(|| {
645                        TermBuilderError::missing_column(format!(
646                            "internal column-kind lookup failed for '{var}'"
647                        ))
648                        .to_string()
649                    })?;
650                    match kind {
651                        ColumnKindTag::Continuous | ColumnKindTag::Binary => numeric_cols.push(col),
652                        ColumnKindTag::Categorical => {
653                            let mut levels = encoded_levels_for_column(ds, ColIdx::new(col));
654                            // Treatment-code (drop the reference level) only when
655                            // the marginal parent that identifies it is present;
656                            // otherwise keep every level (full dummy coding).
657                            let treatment_coded = parent_present(var);
658                            if treatment_coded && levels.len() > 1 {
659                                levels.remove(0);
660                            }
661                            if levels.is_empty() {
662                                return Err(TermBuilderError::incompatible_config(format!(
663                                    "interaction `{}` references categorical column `{var}` with no usable levels",
664                                    vars.join(":")
665                                )));
666                            }
667                            categorical_factors.push((var.clone(), col, levels, treatment_coded));
668                        }
669                    }
670                }
671
672                let label = vars.join(":");
673
674                if categorical_factors.is_empty() {
675                    // Pure numeric `:` interaction — single product column,
676                    // identical to the historical behaviour.
677                    linear_terms.push(LinearTermSpec {
678                        name: label,
679                        feature_col: numeric_cols[0],
680                        feature_cols: numeric_cols,
681                        categorical_levels: vec![],
682                        // Parametric `:` interaction column is unpenalized by
683                        // default, same as any other linear term (#749).
684                        double_penalty: false,
685                        coefficient_geometry: LinearCoefficientGeometry::Unconstrained,
686                        coefficient_min: None,
687                        coefficient_max: None,
688                    });
689                    inference_notes.push(format!(
690                        "wired linear interaction `{}` as product of numeric columns",
691                        vars.join(":")
692                    ));
693                } else {
694                    // Factor-aware expansion: cartesian product over the kept
695                    // levels of every categorical operand. Each cell yields one
696                    // column gating the numeric product (or, with no numeric
697                    // operand, a pure cell indicator).
698                    let mut cells: Vec<Vec<(usize, u64, String)>> = vec![Vec::new()];
699                    for (_var, col, levels, _treatment_coded) in &categorical_factors {
700                        let mut next = Vec::with_capacity(cells.len() * levels.len());
701                        for cell in &cells {
702                            for (bits, level_label) in levels {
703                                let mut extended = cell.clone();
704                                extended.push((*col, *bits, level_label.clone()));
705                                next.push(extended);
706                            }
707                        }
708                        cells = next;
709                    }
710
711                    // Intercept-identifiability cell drop. When the cells are PURE
712                    // INDICATORS (no numeric operand) and at least one factor was
713                    // dummy-coded (kept all its levels), the full set of cell
714                    // columns sums to the all-ones intercept and is rank-deficient
715                    // against it. Drop exactly ONE reference cell — the cell where
716                    // every factor sits at its reference (lexicographically first)
717                    // level — so the remaining saturated cells are identifiable
718                    // (rank n_g*n_h - 1 cells + intercept). With a numeric operand
719                    // the cells gate `x` and sum to `x`, not the intercept, so no
720                    // cell is dropped (the collinearity there is with the absent
721                    // `x` main effect, which is exactly why full coding is right).
722                    let any_dummy_coded = categorical_factors
723                        .iter()
724                        .any(|(_, _, _, treatment_coded)| !*treatment_coded);
725                    if numeric_cols.is_empty() && any_dummy_coded {
726                        // The reference cell pairs each factor's column with the
727                        // bits of its lexicographically-first (index 0) level.
728                        let reference_cell: Vec<(usize, u64)> = categorical_factors
729                            .iter()
730                            .map(|(_, col, _, _)| {
731                                let levels = encoded_levels_for_column(ds, ColIdx::new(*col));
732                                (*col, levels[0].0)
733                            })
734                            .collect();
735                        cells.retain(|cell| {
736                            !reference_cell.iter().all(|(rcol, rbits)| {
737                                cell.iter()
738                                    .any(|(col, bits, _)| col == rcol && bits == rbits)
739                            })
740                        });
741                    }
742
743                    let n_cells = cells.len();
744                    for cell in cells {
745                        let cell_suffix = cell
746                            .iter()
747                            .map(|(_, _, level_label)| level_label.as_str())
748                            .collect::<Vec<_>>()
749                            .join(":");
750                        let categorical_levels =
751                            cell.iter().map(|(col, bits, _)| (*col, *bits)).collect();
752                        // `feature_col` is required to point at a real column;
753                        // use the first numeric operand when present, otherwise
754                        // the first categorical column (its raw value is never
755                        // multiplied — `realized_design_column` starts from ones
756                        // and only gates by the level indicators).
757                        let feature_col = numeric_cols
758                            .first()
759                            .copied()
760                            .unwrap_or(categorical_factors[0].1);
761                        linear_terms.push(LinearTermSpec {
762                            name: format!("{label}:{cell_suffix}"),
763                            feature_col,
764                            feature_cols: numeric_cols.clone(),
765                            categorical_levels,
766                            double_penalty: false,
767                            coefficient_geometry: LinearCoefficientGeometry::Unconstrained,
768                            coefficient_min: None,
769                            coefficient_max: None,
770                        });
771                    }
772                    let all_treatment_coded = !any_dummy_coded;
773                    let coding = if all_treatment_coded {
774                        "treatment-coded"
775                    } else {
776                        "marginality-aware (full dummy / saturated)"
777                    };
778                    inference_notes.push(format!(
779                        "wired factor-aware linear interaction `{}` as {} {} cell column(s)",
780                        vars.join(":"),
781                        n_cells,
782                        coding
783                    ));
784                }
785            }
786        }
787    }
788
789    Ok(TermCollectionSpec {
790        linear_terms,
791        random_effect_terms: random_terms,
792        smooth_terms,
793    })
794}
795
796fn split_list_option(raw: &str) -> Vec<String> {
797    let t = raw.trim();
798    // Accept the Python/JSON list form `[a, b]` AND mgcv's R-vector forms
799    // `c(a, b)` / `(a, b)` as bracketed wrappers around a comma-separated body.
800    // mgcv-style formulas pass per-margin numeric options as `k=c(5,5)` /
801    // `period=c(2*pi, pi)`; without R-vector peeling here those entries were
802    // split into `["c(5", "5)"]` and the downstream numeric parser then
803    // misreported the leading garbage as the invalid digit.
804    let inner = t
805        .strip_prefix('[')
806        .and_then(|u| u.strip_suffix(']'))
807        .or_else(|| {
808            t.strip_prefix("c(")
809                .or_else(|| t.strip_prefix("C("))
810                .or_else(|| t.strip_prefix('('))
811                .and_then(|u| u.strip_suffix(')'))
812        })
813        .unwrap_or(t);
814    inner
815        .split(',')
816        .map(|v| v.trim().to_string())
817        .filter(|v| !v.is_empty())
818        .collect()
819}
820
821fn parse_numeric_expr(raw: &str) -> Result<f64, String> {
822    let mut acc = 1.0f64;
823    let normalized = raw.replace(' ', "");
824    if normalized.eq_ignore_ascii_case("none") {
825        return Err("None is not numeric".to_string());
826    }
827    for factor in normalized.split('*') {
828        if factor.is_empty() {
829            return Err(format!("invalid numeric expression '{raw}'"));
830        }
831        let value = if factor.eq_ignore_ascii_case("pi") || factor == "π" {
832            std::f64::consts::PI
833        } else if factor.eq_ignore_ascii_case("tau") || factor == "τ" {
834            std::f64::consts::TAU
835        } else if let Some(prefix) = factor
836            .strip_suffix("pi")
837            .or_else(|| factor.strip_suffix("π"))
838        {
839            let coefficient = if prefix.is_empty() {
840                1.0
841            } else {
842                prefix
843                    .parse::<f64>()
844                    .map_err(|err| format!("invalid numeric expression '{raw}': {err}"))?
845            };
846            coefficient * std::f64::consts::PI
847        } else if let Some(prefix) = factor
848            .strip_suffix("tau")
849            .or_else(|| factor.strip_suffix("τ"))
850        {
851            let coefficient = if prefix.is_empty() {
852                1.0
853            } else {
854                prefix
855                    .parse::<f64>()
856                    .map_err(|err| format!("invalid numeric expression '{raw}': {err}"))?
857            };
858            coefficient * std::f64::consts::TAU
859        } else {
860            factor
861                .parse::<f64>()
862                .map_err(|err| format!("invalid numeric expression '{raw}': {err}"))?
863        };
864        acc *= value;
865    }
866    Ok(acc)
867}
868
869/// Read an endpoint/period option as a numeric *expression* (`2*pi`, `tau`,
870/// `0.5*tau`, `6.283185307179586`, ...) — the same grammar that `period=` and
871/// `origin=` already accept via [`parse_numeric_expr`].
872///
873/// Returns `Ok(None)` when the key is absent, `Ok(Some(v))` when it parses, and
874/// a hard `Err` when the key is *present but unparseable*. The crucial contrast
875/// is with the lenient [`option_f64`], which collapses an unparseable value to
876/// `None` and lets the caller silently substitute the data range — wrapping a
877/// cyclic smooth at the wrong period with no diagnostic (the #815 failure mode).
878fn option_numeric_expr(
879    options: &BTreeMap<String, String>,
880    key: &str,
881) -> Result<Option<f64>, String> {
882    match options.get(key) {
883        None => Ok(None),
884        Some(raw) => parse_numeric_expr(raw)
885            .map(Some)
886            .map_err(|err| format!("option `{key}={raw}` is not a valid numeric value: {err}")),
887    }
888}
889
890fn parse_periods_option(
891    options: &BTreeMap<String, String>,
892    dim: usize,
893) -> Result<Option<Vec<Option<f64>>>, String> {
894    let Some(raw) = options.get("period") else {
895        return Ok(None);
896    };
897    let values = split_list_option(raw);
898    let mut periods = vec![None; dim];
899    if values.len() == 1 && dim == 1 {
900        periods[0] = Some(parse_numeric_expr(&values[0])?);
901    } else {
902        if values.len() != dim {
903            return Err(format!(
904                "period list length {} must match smooth dimension {}",
905                values.len(),
906                dim
907            ));
908        }
909        for (i, v) in values.iter().enumerate() {
910            if v.eq_ignore_ascii_case("none") {
911                continue;
912            }
913            periods[i] = Some(parse_numeric_expr(v)?);
914        }
915    }
916    Ok(Some(periods))
917}
918
919fn parse_periodic_axes_option(
920    options: &BTreeMap<String, String>,
921    dim: usize,
922) -> Result<Option<Vec<Option<f64>>>, String> {
923    let Some(raw_axes) = options.get("periodic") else {
924        return Ok(None);
925    };
926    let mut periods = parse_periods_option(options, dim)?.unwrap_or_else(|| vec![None; dim]);
927    // Scalar boolean form (`periodic=true` / `false`, `yes` / `no`) applies to
928    // every axis — the documented per-axis-flag broadcast (see the doc on
929    // `parse_periodic_axes`, the tensor sibling that already accepts it). A
930    // 1-D `duchon(x, periodic=true)` lands here: the cyclic *domain* is then
931    // resolved from the data range by `parse_cyclic_boundary` (the 1-D builder
932    // consults `boundary` first), so a finite explicit period is NOT required —
933    // we only need to NOT mis-read "true" as an axis index (#1074). `false`
934    // means no axis is periodic.
935    let lowered = raw_axes.trim().to_ascii_lowercase();
936    match lowered.as_str() {
937        "true" | "yes" | "y" => return Ok(Some(periods)),
938        // `false` means NO axis is periodic. Return `None` — NOT
939        // `Some(vec![None; dim])` — because the radial 1-D consumer treats a
940        // `Some([None])` as "periodicity requested, derive the wrap period from
941        // the data range" (see the Duchon builder arm below, which back-fills
942        // `axes[0] = data_span` for a lone `None`) and the 1-D builder routes on
943        // `spec.periodic.is_some()`. Emitting `Some([None])` here therefore
944        // silently produced a *periodic* smooth for an explicit `periodic=false`
945        // — the exact regression this arm now avoids, matching the bracketed
946        // `[false]` form handled by the per-axis boolean block below.
947        "false" | "no" | "n" => return Ok(None),
948        _ => {}
949    }
950    let axes = split_list_option(raw_axes);
951    if axes.is_empty() {
952        return Ok(Some(periods));
953    }
954
955    // Boolean forms `periodic=true` / `periodic=[true, false, ...]`, mirroring
956    // `parse_tensor_periodic_axes`. The radial 1-D builders (`duchon`/`tps`/
957    // `matern`) intentionally DERIVE the wrap period from the closed center
958    // lattice when none is supplied (`prepare_periodic_duchon_centers_1d_with_period`,
959    // gam#580: `None => span`), so a boolean-selected periodic axis legitimately
960    // omits `period`. Without this branch, `duchon(x, periodic=true)`-style
961    // radial formulas failed with the misleading "invalid periodic axis 'true'".
962    let is_bool = |t: &str| {
963        matches!(
964            t.to_ascii_lowercase().as_str(),
965            "true" | "yes" | "y" | "false" | "no" | "n"
966        )
967    };
968    let is_truthy = |t: &str| matches!(t.to_ascii_lowercase().as_str(), "true" | "yes" | "y");
969
970    // Scalar boolean: `periodic=true` / `periodic=false`.
971    if axes.len() == 1 && is_bool(&axes[0]) {
972        if !is_truthy(&axes[0]) {
973            // Non-periodic: return None so the 1-D builder (which routes on
974            // `spec.periodic.is_some()`) does NOT take the periodic path.
975            return Ok(None);
976        }
977        // Every axis periodic; honor any explicit per-axis period, else leave
978        // `None` for the caller (formula arm) / builder to derive the span.
979        return Ok(Some(periods));
980    }
981
982    // Per-axis boolean list: `periodic=[true, false, ...]` (length must match dim).
983    if axes.iter().all(|a| is_bool(a)) {
984        if axes.len() != dim {
985            return Err(format!(
986                "periodic flag list length {} must match smooth dimension {dim}",
987                axes.len()
988            ));
989        }
990        if !axes.iter().any(|a| is_truthy(a)) {
991            return Ok(None);
992        }
993        for (i, a) in axes.iter().enumerate() {
994            if !is_truthy(a) {
995                periods[i] = None;
996            }
997        }
998        return Ok(Some(periods));
999    }
1000
1001    // Index-list form: `periodic=[0, 2]`. Each listed axis must carry an
1002    // explicit finite period — an index gives no per-axis span-derive hint.
1003    for a in &axes {
1004        let axis = a
1005            .parse::<usize>()
1006            .map_err(|err| format!("invalid periodic axis '{a}': {err}"))?;
1007        if axis >= dim {
1008            return Err(format!(
1009                "periodic axis {axis} out of range for {dim}D smooth"
1010            ));
1011        }
1012        if periods[axis].is_none() {
1013            return Err(format!(
1014                "periodic axis {axis} requires period[{axis}] to be finite"
1015            ));
1016        }
1017    }
1018    // Axes not listed are non-periodic even if period list has a finite placeholder.
1019    let listed: std::collections::BTreeSet<usize> = axes
1020        .iter()
1021        .filter_map(|a| a.parse::<usize>().ok())
1022        .collect();
1023    for i in 0..dim {
1024        if !listed.contains(&i) {
1025            periods[i] = None;
1026        }
1027    }
1028    Ok(Some(periods))
1029}
1030
1031// ---------------------------------------------------------------------------
1032// Smooth basis spec construction
1033// ---------------------------------------------------------------------------
1034
1035fn parse_option_list(raw: &str) -> Vec<String> {
1036    let trimmed = raw.trim();
1037    // Accept both the Python/JSON list form `[a, b]` and mgcv's R vector form
1038    // `c(a, b)` (and a bare `(a, b)`) as the bracketed wrapper around a
1039    // comma-separated option list. mgcv writes per-margin options as
1040    // `bs=c('tp','tp')` / `m=c(2,2)`, so the `c(...)` form must round-trip
1041    // through the same splitter the `[...]` form uses.
1042    let inner = trimmed
1043        .strip_prefix('[')
1044        .and_then(|v| v.strip_suffix(']'))
1045        .or_else(|| {
1046            trimmed
1047                .strip_prefix("c(")
1048                .or_else(|| trimmed.strip_prefix("C("))
1049                .or_else(|| trimmed.strip_prefix('('))
1050                .and_then(|v| v.strip_suffix(')'))
1051        })
1052        .unwrap_or(trimmed);
1053    inner
1054        .split(',')
1055        .map(|v| {
1056            v.trim()
1057                .trim_matches('"')
1058                .trim_matches('\'')
1059                .to_ascii_lowercase()
1060        })
1061        .filter(|v| !v.is_empty())
1062        .collect()
1063}
1064
1065fn parse_periodic_axes(
1066    options: &BTreeMap<String, String>,
1067    dim: usize,
1068) -> Result<Vec<bool>, String> {
1069    let mut axes = vec![false; dim];
1070    if let Some(raw) = options.get("periodic").or_else(|| options.get("cyclic")) {
1071        let lowered = raw.trim().to_ascii_lowercase();
1072        match lowered.as_str() {
1073            "true" | "yes" | "y" => {
1074                axes.fill(true);
1075                return Ok(axes);
1076            }
1077            "false" | "no" | "n" => return Ok(axes),
1078            _ => {}
1079        }
1080        for axis_raw in parse_option_list(raw) {
1081            let axis = axis_raw
1082                .parse::<usize>()
1083                .map_err(|err| format!("invalid periodic axis '{axis_raw}': {err}"))?;
1084            if axis >= dim {
1085                return Err(format!(
1086                    "periodic axis {axis} out of range for {dim}D smooth"
1087                ));
1088            }
1089            axes[axis] = true;
1090        }
1091    }
1092    if let Some(raw) = options.get("boundary").or_else(|| options.get("bc")) {
1093        let boundary = parse_option_list(raw);
1094        if boundary.len() == dim {
1095            for (axis, value) in boundary.iter().enumerate() {
1096                if matches!(value.as_str(), "periodic" | "cyclic" | "cc") {
1097                    axes[axis] = true;
1098                }
1099            }
1100        } else if dim == 1
1101            && matches!(
1102                boundary.first().map(String::as_str),
1103                Some("periodic" | "cyclic" | "cc")
1104            )
1105        {
1106            axes[0] = true;
1107        }
1108    }
1109    Ok(axes)
1110}
1111
1112fn parse_optional_numeric_list(
1113    options: &BTreeMap<String, String>,
1114    keys: &[&str],
1115    dim: usize,
1116) -> Result<Vec<Option<f64>>, String> {
1117    let Some(raw) = keys.iter().find_map(|key| options.get(*key)) else {
1118        return Ok(vec![None; dim]);
1119    };
1120    let values = split_list_option(raw);
1121    let mut out = vec![None; dim];
1122    if values.len() == 1 && dim == 1 {
1123        if !values[0].eq_ignore_ascii_case("none") {
1124            out[0] = Some(parse_numeric_expr(&values[0])?);
1125        }
1126        return Ok(out);
1127    }
1128    if values.len() != dim {
1129        return Err(format!(
1130            "numeric option list length {} must match smooth dimension {}",
1131            values.len(),
1132            dim
1133        ));
1134    }
1135    for (i, value) in values.iter().enumerate() {
1136        if !value.eq_ignore_ascii_case("none") {
1137            out[i] = Some(parse_numeric_expr(value)?);
1138        }
1139    }
1140    Ok(out)
1141}
1142
1143fn parse_periods(
1144    options: &BTreeMap<String, String>,
1145    periodic_axes: &[bool],
1146) -> Result<Vec<Option<f64>>, String> {
1147    let dim = periodic_axes.len();
1148    // Broadcast a single-element `period=[v]` onto the lone periodic axis
1149    // of a multi-axis smooth (e.g. `te(th, h, bc=['periodic','natural'],
1150    // period=[2*pi])`): with only one periodic margin, the value can only
1151    // belong there.
1152    let lone_periodic_broadcast = options
1153        .get("period")
1154        .or_else(|| options.get("periods"))
1155        .and_then(|raw| {
1156            let values = split_list_option(raw);
1157            if values.len() != 1 || dim <= 1 {
1158                return None;
1159            }
1160            let mut iter = periodic_axes.iter().enumerate().filter(|(_, p)| **p);
1161            let first = iter.next()?;
1162            if iter.next().is_some() {
1163                return None;
1164            }
1165            Some((first.0, values.into_iter().next().unwrap()))
1166        });
1167    let periods = if let Some((axis, value)) = lone_periodic_broadcast {
1168        let mut out = vec![None; dim];
1169        if !value.eq_ignore_ascii_case("none") {
1170            out[axis] = Some(parse_numeric_expr(&value)?);
1171        }
1172        out
1173    } else {
1174        parse_optional_numeric_list(options, &["period", "periods"], dim)?
1175    };
1176    for (axis, (periodic, period)) in periodic_axes.iter().zip(periods.iter()).enumerate() {
1177        if *periodic
1178            && let Some(value) = period
1179            && (!value.is_finite() || *value <= 0.0)
1180        {
1181            return Err(format!(
1182                "period for periodic axis {axis} must be finite and positive, got {value}"
1183            ));
1184        }
1185    }
1186    Ok(periods)
1187}
1188
1189fn parse_period_origins(
1190    options: &BTreeMap<String, String>,
1191    periodic_axes: &[bool],
1192) -> Result<Vec<Option<f64>>, String> {
1193    parse_optional_numeric_list(
1194        options,
1195        &[
1196            "origin",
1197            "origins",
1198            "period_origin",
1199            "period-origin",
1200            "domain_origin",
1201        ],
1202        periodic_axes.len(),
1203    )
1204}
1205
1206/// Parse a per-axis periodic flag list for tensor smooths. Accepts three forms:
1207/// - `periodic=true` / `periodic=false` (scalar applied to every axis),
1208/// - `periodic=[true, false, ...]` (one flag per axis, length `dim`),
1209/// - `periodic=c(1, 1)` / `c(0, 0)` (a length-`dim` 0/1 mask, mgcv's
1210///   per-margin spelling — distinguished from an axis-index list by the
1211///   repeated 0/1 value), and
1212/// - `periodic=[0, 2, ...]` (axis indices that are periodic; others are not).
1213///
1214/// `boundary=[..., "periodic"/"cyclic"/"cc", ...]` may also flip individual
1215/// axes on; non-matching tokens leave the existing flag unchanged.
1216fn parse_tensor_periodic_axes(
1217    options: &BTreeMap<String, String>,
1218    dim: usize,
1219) -> Result<Vec<bool>, String> {
1220    let mut axes = vec![false; dim];
1221    if let Some(raw) = options.get("periodic").or_else(|| options.get("cyclic")) {
1222        let lowered = raw.trim().to_ascii_lowercase();
1223        match lowered.as_str() {
1224            "true" | "yes" | "y" => {
1225                axes.fill(true);
1226            }
1227            "false" | "no" | "n" => {
1228                // Already false; allow `boundary=` below to flip axes if set.
1229            }
1230            _ => {
1231                let entries = parse_option_list(raw);
1232                let all_bool = !entries.is_empty()
1233                    && entries.iter().all(|v| {
1234                        matches!(
1235                            v.as_str(),
1236                            "true" | "yes" | "y" | "false" | "no" | "n" | "none"
1237                        )
1238                    });
1239                // mgcv writes per-margin flag vectors as `periodic=c(1,1)` /
1240                // `periodic=c(0,0)` — a length-`dim` mask where each entry is a
1241                // 0/1 flag for THAT margin, not an axis index. A bare axis-index
1242                // list (`periodic=[0,1]`, `periodic=[0]`) lists DISTINCT margin
1243                // indices to turn on. The two collide only when the list is all
1244                // 0/1 of length `dim`; disambiguate by the repeated-value
1245                // signature `c(1,1)`/`c(0,0)` (a valid axis-index set never
1246                // repeats an index), which is the canonical mask spelling. This
1247                // is what makes the leading tensor margin honor its periodic flag
1248                // (#1751: `periodic=c(1,1)` previously parsed `1,1` as axis
1249                // indices, marking only axis 1 and dropping axis 0).
1250                let all_zero_one = !entries.is_empty()
1251                    && entries.iter().all(|v| v == "0" || v == "1");
1252                let has_repeat = {
1253                    let mut seen = std::collections::BTreeSet::new();
1254                    !entries.iter().all(|v| seen.insert(v.clone()))
1255                };
1256                let numeric_mask = all_zero_one && entries.len() == dim && has_repeat;
1257                if all_bool || numeric_mask {
1258                    if entries.len() != dim {
1259                        return Err(format!(
1260                            "periodic list length {} must match smooth dimension {}",
1261                            entries.len(),
1262                            dim
1263                        ));
1264                    }
1265                    for (i, v) in entries.iter().enumerate() {
1266                        axes[i] = matches!(v.as_str(), "true" | "yes" | "y" | "1");
1267                    }
1268                } else {
1269                    for axis_raw in entries {
1270                        let axis = axis_raw
1271                            .parse::<usize>()
1272                            .map_err(|err| format!("invalid periodic axis '{axis_raw}': {err}"))?;
1273                        if axis >= dim {
1274                            return Err(format!(
1275                                "periodic axis {axis} out of range for {dim}D smooth"
1276                            ));
1277                        }
1278                        axes[axis] = true;
1279                    }
1280                }
1281            }
1282        }
1283    }
1284    if let Some(raw) = options.get("boundary").or_else(|| options.get("bc")) {
1285        let boundary = parse_option_list(raw);
1286        if boundary.len() == dim {
1287            for (axis, value) in boundary.iter().enumerate() {
1288                if matches!(value.as_str(), "periodic" | "cyclic" | "cc") {
1289                    axes[axis] = true;
1290                }
1291            }
1292        }
1293    }
1294    // A per-margin basis vector (`bs=c('cc','ps')` / `type=[...]`) declares each
1295    // margin's basis family, and a cyclic family (`cc`/`cp`/`cyclic`) makes THAT
1296    // margin periodic — exactly as the 1-D `s(x, bs='cc')` smooth wraps its lone
1297    // axis. Without this, the per-margin `cc` token was validated but discarded:
1298    // every `bs=c(...)` spelling collapsed to the same open B-spline tensor
1299    // (#1752). Only honor the vector form here; a scalar `bs='cc'` on a tensor is
1300    // ambiguous about which margins wrap, so it does not flip any axis on.
1301    if let Some(raw) = options.get("bs").or_else(|| options.get("type"))
1302        && bs_selector_is_vector(raw)
1303    {
1304        let per_margin = parse_option_list(raw);
1305        if per_margin.len() == dim {
1306            for (axis, margin_bs) in per_margin.iter().enumerate() {
1307                if matches!(
1308                    canonicalize_smooth_type(margin_bs),
1309                    "cc" | "cp" | "cyclic"
1310                ) {
1311                    axes[axis] = true;
1312                }
1313            }
1314        }
1315    }
1316    Ok(axes)
1317}
1318
1319/// Validate the per-margin `boundary=`/`bc=` tokens on a tensor-product smooth.
1320///
1321/// The tensor `boundary`/`bc` list selects, per margin, whether the margin
1322/// *wraps* (a `periodic`/`cyclic`/`cc` token, consumed by
1323/// [`parse_tensor_periodic_axes`]) or is an ordinary non-periodic margin. In the
1324/// tensor DSL a *non-periodic* margin is spelled `clamped` — in the B-spline
1325/// sense of a **clamped knot vector**, i.e. the standard open spline that is
1326/// free at its two ends and does not wrap (exactly how the callers document it:
1327/// "non-periodic / clamped … free at the two ends, no wrap"). It is therefore an
1328/// inert marker here, not a zero-derivative endpoint reparameterization: a
1329/// cylinder `te(theta, z, boundary=['periodic','clamped'], …)` is a cyclic θ
1330/// margin tensor-producted with an ordinary open z margin, the direct analog of
1331/// mgcv `te(bs=c("cc","ps"))` / `te(bs=c("cc","cr"))`.
1332///
1333/// The periodic selectors and the inert non-periodic markers
1334/// (`clamped`/`open`/`natural`/`free`/`none`/empty) are accepted; anything else
1335/// (e.g. a genuine `anchored` zero-value endpoint constraint, which has no
1336/// ordinary-margin meaning in a tensor) is surfaced as a clean
1337/// unsupported-feature error rather than silently dropped. Previously `clamped`
1338/// itself was rejected, so the cylinder/torus mixed-boundary tensors — the exact
1339/// construction the manifold quality suite builds — could not be fit at all.
1340fn validate_tensor_boundary_tokens(
1341    options: &BTreeMap<String, String>,
1342    dim: usize,
1343) -> Result<(), String> {
1344    let Some(raw) = options.get("boundary").or_else(|| options.get("bc")) else {
1345        return Ok(());
1346    };
1347    let entries = parse_option_list(raw);
1348    for (axis, value) in entries.iter().enumerate() {
1349        let inert = matches!(
1350            value.trim().to_ascii_lowercase().as_str(),
1351            "clamped" | "open" | "natural" | "free" | "none" | "" | "periodic" | "cyclic" | "cc"
1352        );
1353        if !inert {
1354            return Err(TermBuilderError::unsupported_feature(format!(
1355                "tensor smooth margin {axis} boundary token '{value}' is not supported \
1356                 (got bc/boundary={raw:?} on a {dim}-D tensor); tensor margins accept the periodic \
1357                 selectors (periodic/cyclic/cc) or the non-periodic markers (clamped/open/natural/free). \
1358                 Apply anchored/zero-value endpoint constraints with a 1-D s(x, bc=...) term instead."
1359            ))
1360            .to_string());
1361        }
1362    }
1363    Ok(())
1364}
1365
1366fn tensor_k_axis_option_axis(
1367    key: &str,
1368    cols: &[usize],
1369    ds: &Dataset,
1370) -> Result<Option<usize>, String> {
1371    let Some(suffix) = key.strip_prefix("k_") else {
1372        return Ok(None);
1373    };
1374    if suffix.is_empty() {
1375        return Err("tensor k axis option must be named k_<axis> or k_<variable>".to_string());
1376    }
1377    if let Ok(axis) = suffix.parse::<usize>() {
1378        return if axis < cols.len() {
1379            Ok(Some(axis))
1380        } else {
1381            Err(format!(
1382                "tensor k axis option `{key}` references axis {axis}, but the smooth has {} margins",
1383                cols.len()
1384            ))
1385        };
1386    }
1387
1388    let mut matches = cols
1389        .iter()
1390        .enumerate()
1391        .filter(|(_, col)| ds.headers.get(**col).is_some_and(|name| name == suffix))
1392        .map(|(axis, _)| axis);
1393    let first = matches.next();
1394    if matches.next().is_some() {
1395        return Err(format!(
1396            "tensor k axis option `{key}` matches more than one margin named `{suffix}`"
1397        ));
1398    }
1399    first.map(Some).ok_or_else(|| {
1400        let margin_names = cols
1401            .iter()
1402            .enumerate()
1403            .map(|(axis, col)| {
1404                let name = ds
1405                    .headers
1406                    .get(*col)
1407                    .map(String::as_str)
1408                    .unwrap_or("<unnamed>");
1409                format!("{axis}:{name}")
1410            })
1411            .collect::<Vec<_>>()
1412            .join(", ");
1413        format!(
1414            "tensor k axis option `{key}` does not match a margin index or name; tensor margins are [{margin_names}]"
1415        )
1416    })
1417}
1418
1419fn is_tensor_k_axis_option_key(key: &str) -> bool {
1420    key.strip_prefix("k_")
1421        .is_some_and(|suffix| !suffix.is_empty())
1422}
1423
1424/// Parse a per-margin basis dimension list (`k=<scalar>`, `k=[k0, k1, ...]`,
1425/// or axis aliases like `k_x=...` / `k_0=...`). A scalar is broadcast across
1426/// all axes; `None` returns the heuristic from the data column.
1427fn parse_tensor_k_list(
1428    options: &BTreeMap<String, String>,
1429    cols: &[usize],
1430    ds: &Dataset,
1431) -> Result<(Vec<usize>, bool), String> {
1432    let mut axis_values = vec![None; cols.len()];
1433    let mut saw_axis_alias = false;
1434    for (key, value) in options {
1435        let Some(axis) = tensor_k_axis_option_axis(key, cols, ds)? else {
1436            continue;
1437        };
1438        saw_axis_alias = true;
1439        if axis_values[axis].is_some() {
1440            return Err(format!("tensor k axis {axis} is specified more than once"));
1441        }
1442        let k: usize = value
1443            .parse()
1444            .map_err(|err| format!("invalid tensor k option `{key}={value}`: {err}"))?;
1445        axis_values[axis] = Some(k);
1446    }
1447
1448    let raw = options
1449        .get("k")
1450        .or_else(|| options.get("basis_dim"))
1451        .or_else(|| options.get("basis-dim"))
1452        .or_else(|| options.get("basisdim"));
1453    if saw_axis_alias {
1454        if raw.is_some() {
1455            return Err(
1456                "tensor k axis aliases cannot be combined with k= or basis_dim=".to_string(),
1457            );
1458        }
1459        if let Some(missing_axis) = axis_values.iter().position(Option::is_none) {
1460            let margin_name = cols
1461                .get(missing_axis)
1462                .and_then(|col| ds.headers.get(*col))
1463                .map(String::as_str)
1464                .unwrap_or("<unnamed>");
1465            return Err(format!(
1466                "tensor k axis aliases must specify every margin; missing axis {missing_axis} ({margin_name})"
1467            ));
1468        }
1469        return Ok((
1470            axis_values
1471                .into_iter()
1472                .map(|k| k.expect("missing axis values rejected above"))
1473                .collect(),
1474            false,
1475        ));
1476    }
1477    let Some(raw) = raw else {
1478        let inferred = heuristic_tensor_margin_knots(cols, ds);
1479        return Ok((inferred, true));
1480    };
1481    let entries = split_list_option(raw);
1482    if entries.len() == 1 {
1483        let k: usize = entries[0]
1484            .parse()
1485            .map_err(|err| format!("invalid tensor k '{}': {err}", entries[0]))?;
1486        return Ok((vec![k; cols.len()], false));
1487    }
1488    if entries.len() != cols.len() {
1489        return Err(format!(
1490            "tensor k list length {} must match smooth dimension {}",
1491            entries.len(),
1492            cols.len()
1493        ));
1494    }
1495    let mut out = Vec::with_capacity(entries.len());
1496    for entry in entries {
1497        let k: usize = entry
1498            .parse()
1499            .map_err(|err| format!("invalid tensor k '{entry}': {err}"))?;
1500        out.push(k);
1501    }
1502    Ok((out, false))
1503}
1504
1505/// Parse the `identifiability=` option for tensor-product smooths. Mirrors the
1506/// vocabulary of the Matern/Duchon parsers so the formula DSL is consistent.
1507///
1508/// `kind` selects the default identifiability when no explicit
1509/// `identifiability=` option is supplied: `te(...)` ([`SmoothKind::Te`]) keeps
1510/// the full-tensor sum-to-zero default, while `ti(...)` ([`SmoothKind::Ti`])
1511/// defaults to per-margin sum-to-zero so the marginal main effects are excluded
1512/// (the mgcv tensor-interaction semantics). An explicit option always wins.
1513fn parse_tensor_identifiability(
1514    options: &BTreeMap<String, String>,
1515    kind: SmoothKind,
1516) -> Result<TensorBSplineIdentifiability, String> {
1517    let Some(raw) = options.get("identifiability").map(String::as_str) else {
1518        return Ok(match kind {
1519            SmoothKind::Ti => TensorBSplineIdentifiability::MarginalSumToZero,
1520            _ => TensorBSplineIdentifiability::default(),
1521        });
1522    };
1523    match raw.trim().to_ascii_lowercase().as_str() {
1524        "none" => Ok(TensorBSplineIdentifiability::None),
1525        "sum_tozero" | "sum-to-zero" | "center_sum_tozero" | "center-sum-to-zero" | "centered"
1526        | "sumtozero" => Ok(TensorBSplineIdentifiability::SumToZero),
1527        "marginal_sum_tozero" | "marginal-sum-to-zero" | "marginal_sumtozero"
1528        | "marginalsumtozero" | "interaction" => {
1529            Ok(TensorBSplineIdentifiability::MarginalSumToZero)
1530        }
1531        other => Err(TermBuilderError::unsupported_feature(format!(
1532            "invalid tensor identifiability '{other}'; expected one of: none, sum_tozero, marginal_sum_tozero"
1533        ))
1534        .to_string()),
1535    }
1536}
1537
1538fn bspline_boundary_declares_periodic_axis(options: &BTreeMap<String, String>) -> bool {
1539    options
1540        .get("boundary")
1541        .or_else(|| options.get("bc"))
1542        .map(|raw| {
1543            parse_option_list(raw)
1544                .into_iter()
1545                .any(|value| matches!(value.as_str(), "periodic" | "cyclic" | "cc"))
1546        })
1547        .unwrap_or(false)
1548}
1549
1550/// Canonical-name lookup for the `bs=`/`type=` smooth selector.
1551///
1552/// User-facing names — including mgcv-compatible spellings whose semantics
1553/// match an existing gamfit smooth exactly — collapse to the engine-internal
1554/// canonical names used by the dispatch in [`build_smooth_basis`]. Adding a
1555/// new exactly-equivalent alias is a one-line entry here; the match arms
1556/// below remain the single dispatch site.
1557///
1558/// Aliases listed here MUST be true semantic equivalents of the canonical
1559/// target, not approximations. mgcv names whose semantics differ from any
1560/// gamfit smooth (e.g. `bs="ts"` shrinkage thin-plate, `bs="ad"` adaptive)
1561/// are intentionally NOT mapped here — they should reach the unsupported-type
1562/// path so users get a real diagnostic instead of a silent semantic
1563/// substitution. mgcv's `bs="cr"`/`"cs"` (cubic regression and its shrinkage
1564/// twin) are handled directly in the [`build_smooth_basis`] dispatch — they
1565/// are not aliased here because the `cr`/`cs` distinction controls a default
1566/// (`double_penalty`) that the canonical-name layer cannot see.
1567///
1568/// Unrecognised inputs pass through unchanged so the dispatch can produce its
1569/// usual "unsupported smooth type" error, preserving the existing diagnostic
1570/// surface for genuine typos.
1571pub(crate) fn canonicalize_smooth_type(raw: &str) -> &str {
1572    match raw {
1573        // Thin-plate spline. mgcv `bs="tp"` is the default thin-plate
1574        // regression spline — exact semantic equivalent of gamfit's `"tps"`.
1575        "tp" => "tps",
1576        // Gaussian process / Matérn. mgcv `bs="gp"` defaults to a Matérn
1577        // covariance kernel with REML smoothing parameter selection, which
1578        // matches gamfit's `"matern"` exactly (same kernel-Gram identity,
1579        // same REML route).
1580        "gp" => "matern",
1581        // Constant-curvature (M_κ) geodesic-kernel smooth (#944). All aliases
1582        // collapse to one canonical type so `bs="curv"`/`bs="mkappa"` cannot
1583        // diverge from `curv(...)`.
1584        "curv" | "constant_curvature" | "mkappa" => "curvature",
1585        // Measure-jet spline: multiscale local-jet-residual energy of the
1586        // empirical measure. No mgcv equivalent (mgcv has no measure-learned
1587        // geometry smooth), so no mgcv alias is mapped.
1588        "mjs" | "measure_jet" | "web" => "measurejet",
1589        other => other,
1590    }
1591}
1592
1593/// Is `margin_bs` a per-margin basis name that the tensor builder realizes as a
1594/// penalized 1-D B-spline margin?
1595///
1596/// gam's tensor product is built from penalized B-spline marginals. mgcv's
1597/// thin-plate (`tp`/`tps`), P-spline (`ps`), B-spline (`bs`), cubic-regression
1598/// (`cr`/`cs`), and cyclic (`cc`/`cp`/`cyclic`) marginals are all penalized
1599/// splines spanning the same per-axis smoothing space, so a B-spline margin
1600/// reproduces the same tensor smoothing class. Margin kinds with fundamentally
1601/// different structure (adaptive, random-effect, sphere) are NOT accepted as
1602/// tensor margins.
1603pub(crate) fn tensor_margin_bs_is_supported(margin_bs: &str) -> bool {
1604    matches!(
1605        canonicalize_smooth_type(margin_bs),
1606        "tps" | "ps" | "bs" | "bspline" | "cr" | "cs" | "cc" | "cp" | "cyclic"
1607    )
1608}
1609
1610/// Does the smooth request a periodic/cyclic axis via its options?
1611///
1612/// Mirrors the boundary-condition reading used by the periodic-aware dispatch
1613/// branches. Factored out so the type resolver and `build_smooth_basis` agree
1614/// on a single notion of "periodic requested".
1615pub(crate) fn smooth_options_declare_periodic(options: &BTreeMap<String, String>) -> bool {
1616    options.contains_key("periodic")
1617        || options.contains_key("cyclic")
1618        || options
1619            .get("boundary")
1620            .or_else(|| options.get("bc"))
1621            .map(|boundary| {
1622                boundary.to_ascii_lowercase().contains("periodic")
1623                    || boundary.to_ascii_lowercase().contains("cyclic")
1624            })
1625            .unwrap_or(false)
1626}
1627
1628/// Resolve the canonical engine-internal smooth-type name for a term.
1629///
1630/// Reads the user-facing `type=`/`bs=` selector and collapses mgcv-compatible
1631/// aliases (`tp`→`tps`, `gp`→`matern`) via [`canonicalize_smooth_type`], or
1632/// derives the default from the smooth kind/arity when no selector is given.
1633/// This is the single source of truth for the dispatch in
1634/// [`build_smooth_basis`]; other call sites (e.g. predictor-specific basis
1635/// policy) use it so the classification never drifts from the dispatch.
1636/// Is the raw `bs=`/`type=` selector a vector literal (`c('tp','tp')`,
1637/// `['tp','tp']`, `(tp, tp)`) rather than a scalar smooth-type name?
1638///
1639/// mgcv's tensor smooths take a *per-margin* basis vector
1640/// (`te(x1, x2, bs=c('tp','tp'))`). Such a value is not a scalar canonical
1641/// type and must not be fed through [`canonicalize_smooth_type`] — it has to be
1642/// recognized as a tensor request and split into per-margin types. A scalar
1643/// selector (`bs="tp"`) is left untouched.
1644pub(crate) fn bs_selector_is_vector(raw: &str) -> bool {
1645    let trimmed = raw.trim();
1646    let bracketed = (trimmed.starts_with('[') && trimmed.ends_with(']'))
1647        || (trimmed.starts_with("c(") || trimmed.starts_with("C(")) && trimmed.ends_with(')')
1648        || (trimmed.starts_with('(') && trimmed.ends_with(')'));
1649    bracketed && !parse_option_list(trimmed).is_empty()
1650}
1651
1652pub fn resolve_smooth_type_name(
1653    kind: SmoothKind,
1654    n_cols: usize,
1655    options: &BTreeMap<String, String>,
1656) -> String {
1657    let selector = options.get("type").or_else(|| options.get("bs"));
1658    // A per-margin basis vector is a tensor request, never a scalar type. Route
1659    // it to the tensor builder, which reads the per-margin types out of the
1660    // same `bs=` option. (A vector on a non-tensor smooth is ill-formed and
1661    // falls through to the scalar path below so the existing diagnostic fires.)
1662    if let Some(raw) = selector
1663        && bs_selector_is_vector(raw)
1664        && matches!(kind, SmoothKind::Te | SmoothKind::Ti | SmoothKind::T2)
1665    {
1666        return "tensor".to_string();
1667    }
1668    selector
1669        .map(|s| canonicalize_smooth_type(&s.to_ascii_lowercase()).to_string())
1670        .unwrap_or_else(|| match kind {
1671            SmoothKind::Te | SmoothKind::Ti | SmoothKind::T2 => "tensor".to_string(),
1672            SmoothKind::S if n_cols == 1 => "bspline".to_string(),
1673            // Mixed periodic Euclidean radial kernels are not separable on the
1674            // cylinder. Use a tensor product with a cyclic margin so s(theta,h)
1675            // honors seam continuity while preserving the formula-level s(...).
1676            SmoothKind::S if smooth_options_declare_periodic(options) => "tensor".to_string(),
1677            SmoothKind::S => "tps".to_string(),
1678        })
1679}
1680
1681/// Does this canonical smooth type size its basis through the generous spatial
1682/// center heuristic ([`crate::basis::default_num_centers`])?
1683///
1684/// Only the radial spatial bases (thin-plate, Matérn/GP, Duchon) route their
1685/// default basis dimension through `plan_spatial_basis(.., Default, ..)`. The
1686/// B-spline, cyclic, tensor, and factor-smooth bases use their own modest
1687/// knot-based defaults, so they are unaffected by — and must not be perturbed
1688/// by — secondary-predictor basis-parsimony adjustments (#501).
1689pub fn smooth_type_uses_spatial_center_heuristic(canonical_type: &str) -> bool {
1690    matches!(canonical_type, "tps" | "matern" | "duchon")
1691}
1692
1693pub fn build_smooth_basis(
1694    kind: SmoothKind,
1695    vars: &[String],
1696    cols: &[usize],
1697    options: &BTreeMap<String, String>,
1698    ds: &Dataset,
1699    inference_notes: &mut Vec<String>,
1700    policy: &ResourcePolicy,
1701    smooth_coordinate_count: usize,
1702) -> Result<SmoothBasisSpec, String> {
1703    // Fail fast on degenerate input: a smooth whose (non-categorical) coordinate
1704    // columns collapse to a SINGLE distinct point can only ever fit the response
1705    // mean — its design matrix is rank-1. For a UNIVARIATE smooth this is exactly
1706    // "the one column is constant": `smooth(x)`/`matern(x)` on constant `x` would
1707    // otherwise silently fit the mean of `y` with no visible cue (Duchon already
1708    // errors loudly via the basis layer; this makes the diagnosis explicit and
1709    // uniform). For a MULTIVARIATE smooth (tensor, sphere, tps, ...) a single
1710    // constant coordinate is NOT degenerate — the basis still varies along the
1711    // other coordinate(s) and the penalty absorbs the rank-deficient direction
1712    // (e.g. a constant-longitude meridian arc on the sphere is a well-posed 1-D
1713    // slice of S²). Such a term is degenerate only when EVERY coordinate is
1714    // constant at once, i.e. the joint input is a single point. Test the JOINT
1715    // cardinality, not each column independently, so the loud diagnosis still
1716    // fires for the genuinely rank-1 case without rejecting well-posed
1717    // lower-dimensional slices.
1718    let coord_cols: Vec<(&String, usize)> = vars
1719        .iter()
1720        .zip(cols.iter().copied())
1721        .filter(|(_, col)| !matches!(ds.column_kinds.get(*col), Some(ColumnKindTag::Categorical)))
1722        .collect();
1723    if !coord_cols.is_empty() {
1724        let views: Vec<ArrayView1<'_, f64>> = coord_cols
1725            .iter()
1726            .map(|(_, col)| ds.values.column(*col))
1727            .collect();
1728        let n_rows = views[0].len();
1729        let mut distinct_points = std::collections::HashSet::<Vec<u64>>::new();
1730        for r in 0..n_rows {
1731            let key: Vec<u64> = views
1732                .iter()
1733                .map(|v| {
1734                    let x = v[r];
1735                    let norm = if x == 0.0 { 0.0 } else { x };
1736                    norm.to_bits()
1737                })
1738                .collect();
1739            distinct_points.insert(key);
1740            if distinct_points.len() > 1 {
1741                break;
1742            }
1743        }
1744        if distinct_points.len() <= 1 {
1745            return Err(TermBuilderError::degenerate_data(if coord_cols.len() == 1 {
1746                let var = coord_cols[0].0;
1747                format!(
1748                    "smooth term over '{var}' has only one unique value in the training data \
1749                     — a smooth on a constant column is degenerate and would only fit the response mean. \
1750                     Remove `{var}` from the smooth, drop the term, or check the data."
1751                )
1752            } else {
1753                let names = coord_cols
1754                    .iter()
1755                    .map(|(v, _)| v.as_str())
1756                    .collect::<Vec<_>>()
1757                    .join(", ");
1758                format!(
1759                    "smooth term over ({names}) has only one unique joint coordinate in the training \
1760                     data — every coordinate is constant, so the smooth is degenerate and would only \
1761                     fit the response mean. Drop the term or check the data."
1762                )
1763            })
1764            .to_string());
1765        }
1766    }
1767    if let Some(by_name) = options.get("by").cloned() {
1768        let by_col = options
1769            .get("__by_col")
1770            .and_then(|raw| raw.parse::<usize>().ok())
1771            .or_else(|| vars.iter().position(|v| v == &by_name).map(|idx| cols[idx]))
1772            .ok_or_else(|| format!("unknown by= column '{by_name}'"))?;
1773        let mut inner_options = options.clone();
1774        inner_options.remove("by");
1775        inner_options.remove("__by_col");
1776        inner_options.remove("id");
1777        let inner = build_smooth_basis(
1778            kind,
1779            vars,
1780            cols,
1781            &inner_options,
1782            ds,
1783            inference_notes,
1784            policy,
1785            smooth_coordinate_count,
1786        )?;
1787        let by_kind = match ds.column_kinds.get(by_col).copied() {
1788            Some(ColumnKindTag::Categorical) => ByVarKind::Factor {
1789                feature_col: by_col,
1790                ordered: option_bool(options, "ordered").unwrap_or(false),
1791                frozen_levels: None,
1792            },
1793            Some(ColumnKindTag::Continuous | ColumnKindTag::Binary) => ByVarKind::Numeric {
1794                feature_col: by_col,
1795            },
1796            None => {
1797                return Err(format!(
1798                    "internal column-kind lookup failed for by='{by_name}'"
1799                ));
1800            }
1801        };
1802        return Ok(SmoothBasisSpec::BySmooth {
1803            smooth: Box::new(inner),
1804            by_kind,
1805        });
1806    }
1807
1808    let smooth_double_penalty = option_bool(options, "double_penalty").unwrap_or(true);
1809    let type_opt = resolve_smooth_type_name(kind, cols.len(), options);
1810
1811    if matches!(type_opt.as_str(), "fs" | "sz" | "re") {
1812        validate_known_options(
1813            type_opt.as_str(),
1814            options,
1815            &[
1816                "type",
1817                "bs",
1818                "k",
1819                "basis_dim",
1820                "basis-dim",
1821                "basisdim",
1822                "knots",
1823                "knot_placement",
1824                "knot-placement",
1825                "knotplacement",
1826                "degree",
1827                "penalty_order",
1828                "m",
1829                "double_penalty",
1830                "ordered",
1831            ],
1832        )?;
1833        if cols.len() != 2 {
1834            return Err(format!(
1835                "{} factor-smooth currently expects exactly two variables (one numeric, one categorical)",
1836                type_opt
1837            ));
1838        }
1839        let kinds = cols
1840            .iter()
1841            .map(|&c| ds.column_kinds.get(c).copied())
1842            .collect::<Vec<_>>();
1843        let (cont_idx, group_idx) = if type_opt == "re" {
1844            // mgcv random-slope examples are often s(g, x, bs="re").
1845            match (kinds[0], kinds[1]) {
1846                (Some(ColumnKindTag::Categorical), _) => (1usize, 0usize),
1847                (_, Some(ColumnKindTag::Categorical)) => (0usize, 1usize),
1848                _ => (1usize, 0usize),
1849            }
1850        } else {
1851            match (kinds[0], kinds[1]) {
1852                (_, Some(ColumnKindTag::Categorical)) => (0usize, 1usize),
1853                (Some(ColumnKindTag::Categorical), _) => (1usize, 0usize),
1854                _ => {
1855                    return Err(format!(
1856                        "{} factor-smooth requires one categorical factor variable",
1857                        type_opt
1858                    ));
1859                }
1860            }
1861        };
1862        let c = cols[cont_idx];
1863        let (minv, maxv) = col_minmax(ds.values.column(c))?;
1864        let degree = if type_opt == "re" {
1865            1
1866        } else {
1867            option_usize(options, "degree").unwrap_or(DEFAULT_BSPLINE_DEGREE)
1868        };
1869        // For a factor smooth every group's curve is fit from THAT group's rows
1870        // alone, so the marginal's flexibility must respect the least-resolved
1871        // group, not the pooled column. The pooled heuristic can hand the marginal
1872        // a basis that saturates (or exceeds) a small group's sample — e.g. the
1873        // sleepstudy panel has 8 training days per subject, and a default cubic
1874        // basis of 8 functions interpolates each subject's 8 points, leaving no
1875        // room for the wiggliness penalty to collapse the curve toward the
1876        // per-subject line. The factor smooth then fits within-group noise and
1877        // extrapolates badly (held-out forecast worse than the population mean).
1878        //
1879        // Cap the marginal basis below the minimum per-group covariate resolution
1880        // so the penalty always retains residual degrees of freedom to shrink each
1881        // group's curvature toward its linear null space (the random-slope
1882        // estimand). This small-group cap composes with a separate upper bound at
1883        // mgcv's factor-smooth default k=10 (FACTOR_SMOOTH_DEFAULT_BASIS_DIM,
1884        // applied below), so even ample-data groups get the modest SHARED marginal
1885        // a factor smooth wants rather than the full pooled basis. The explicit
1886        // `re` random-effect form takes neither cap: it is a raw linear `[1, x]`
1887        // random effect (0 internal knots), handled in the branch above.
1888        let pooled_internal = heuristic_knots_for_column(ds.values.column(c));
1889        let default_internal = if type_opt == "re" {
1890            // `bs="re"` is a PARAMETRIC random effect, not a smooth of the
1891            // covariate: `s(x, g, bs="re")` is the mgcv random intercept+slope
1892            // `(1 + x | g)`, i.e. a per-group line `[1, x]`, penalized by an iid
1893            // ridge. A degree-1 marginal with ZERO internal knots spans exactly
1894            // that linear space (2 coefficients per group). Using the pooled
1895            // knot heuristic here instead turned the marginal into a
1896            // piecewise-linear B-spline (e.g. 6 functions/group on sleepstudy),
1897            // i.e. a *smooth* with kinks rather than a random slope — many extra
1898            // collinear-across-levels coefficients that ill-condition the joint
1899            // Newton/REML solve (minutes-long fits, and a singular block when
1900            // combined with a separate random intercept `s(g, bs="re")`). The
1901            // raw linear basis is both the correct `re` semantics and fast.
1902            0
1903        } else {
1904            let min_group_resolution =
1905                min_per_group_unique_count(ds.values.column(c), ds.values.column(cols[group_idx]));
1906            // Per-group basis dim = degree + 1 + internal. Hold it well below the
1907            // smallest group's resolution (leave at least two residual points per
1908            // group) so the smooth cannot interpolate that group and the
1909            // wiggliness penalty retains the room to collapse each curve toward
1910            // its linear null space. Never drop below `degree + 2`, which keeps
1911            // exactly the linear span plus a single curvature direction — the
1912            // minimal smoother that can still bend if the data demand it.
1913            let basis_cap = min_group_resolution.saturating_sub(2).max(degree + 2);
1914            let internal_cap = basis_cap.saturating_sub(degree + 1);
1915            let capped = pooled_internal.min(internal_cap.max(1));
1916            // A factor smooth (`fs` AND `sz`) shares ONE marginal across ALL
1917            // levels, each level's curve fit from that group's rows alone. The
1918            // pooled knot heuristic (driven by the full column's sample) hands it
1919            // a much richer basis than the shared signal needs — ~24
1920            // functions/group on the gam#903 factor-smooth-recovery fixtures — so
1921            // REML has the capacity to fit within-group noise and over-fits the
1922            // shared shape (fs: edf 58 vs mgcv's k=10/edf 39; sz: gam 0.068 vs
1923            // mgcv 0.046 truth RMSE), losing the truth-recovery head-to-head with
1924            // the mature tool. mgcv's factor-smooth default `k=10` embodies the
1925            // right convention: a modest shared marginal. Cap the marginal there
1926            // (basis ≈ degree+1+internal ≈ 10) for both flavours when the
1927            // small-group cap above is not already tighter, so REML is not handed
1928            // noise-fitting capacity it does not need. An explicit `k`/`basis_dim`
1929            // overrides this (parse_ps_internal_knots); `re` is the raw linear
1930            // effect handled above.
1931            let fs_default_internal = FACTOR_SMOOTH_DEFAULT_BASIS_DIM
1932                .saturating_sub(degree + 1)
1933                .max(1);
1934            capped.min(fs_default_internal)
1935        };
1936        let (n_knots, _, effective_degree) =
1937            parse_ps_internal_knots(options, degree, default_internal)?;
1938        let penalty_order = option_usize(options, "penalty_order")
1939            .unwrap_or(if effective_degree > 1 { 2 } else { 1 })
1940            .min(effective_degree);
1941        // All factor-smooth flavours (`fs`, `sz`, `re`) place their per-level
1942        // marginal on the SAME penalized B-spline (P-spline) basis. The flavours
1943        // differ ONLY in their penalty/constraint structure (handled below) —
1944        // sz: zero-sum deviation blocks with the per-level null space left
1945        // unpenalized; fs: random-effect double penalty; re: identity ridge.
1946        //
1947        // `sz` USED to route its default-degree marginal to a NATURAL cubic
1948        // regression spline (`cr`), on the belief that mgcv's `bs="sz"` does the
1949        // same and that cr recovers smooth signals more efficiently than the
1950        // (then uncapped) B-spline margin (#1074). That introduced a consistency
1951        // failure (#1605): the `cr` basis enforces the natural boundary
1952        // conditions f''(x_1)=f''(x_k)=0 and extrapolates linearly past the end
1953        // knots, so it CANNOT represent a per-group deviation curve with non-zero
1954        // curvature at the data boundary. Phase-shifted deviation shapes
1955        // (f''(0) = -(2π)² sin(φ) ≠ 0) are then biased toward "free linear +
1956        // anchored wiggle", under-shooting the amplitude — a bias that does NOT
1957        // vanish as n→∞ (n-independent: a genuine consistency failure, not
1958        // finite-sample shrinkage). The earlier #700/#1074 sz fixtures used
1959        // d_g ∝ sin(2πx), whose f'' happens to vanish at x=0 and x=1, so they
1960        // accidentally satisfied the natural BC and never exposed the gap; the
1961        // `fs` sibling, on this very B-spline marginal, recovers the SAME
1962        // phase-shifted data to the noise floor.
1963        //
1964        // The penalized B-spline marginal makes no boundary assumption, so it
1965        // represents arbitrary deviation shapes, and — with the
1966        // FACTOR_SMOOTH_DEFAULT_BASIS_DIM cap above already removing the
1967        // noise-fitting capacity that originally motivated leaving B-splines —
1968        // it recovers the BC-satisfying #700/#1074 signals just as well. Sharing
1969        // one marginal basis across all flavours also lets the B-spline degree/
1970        // knot degradation handle low-cardinality covariates uniformly (what
1971        // `fs` already does), so the `sz`-only cr data-support cap (#1541/#1542)
1972        // — and the asymmetry where only the cr-marginal `sz` spelling hard-
1973        // failed a 3-level ordinal — is no longer needed.
1974        let marginal_knotspec = resolve_nonperiodic_bspline_knotspec(
1975            options,
1976            ds.values.column(c),
1977            (minv, maxv),
1978            effective_degree,
1979            n_knots,
1980        )?;
1981        let marginal = BSplineBasisSpec {
1982            degree: effective_degree,
1983            penalty_order,
1984            knotspec: marginal_knotspec,
1985            // mgcv's `bs="fs"` is a random-effect-style smooth: EVERY per-level
1986            // coefficient, including the marginal null space, is penalized so
1987            // unobserved groups can be predicted — so `fs` keeps the null-space
1988            // (double) penalty. mgcv's `bs="sz"` is a pure across-level
1989            // *deviation* smooth that, under the default `select=FALSE`, leaves
1990            // the per-level null space UNPENALIZED; carrying the double penalty
1991            // there shrinks the genuine deviation signal and over-smooths the
1992            // recovered curves relative to mgcv (gam#700). `re` carries its own
1993            // identity ridge below and ignores this flag. Honour an explicit
1994            // user `double_penalty=` either way.
1995            double_penalty: option_bool(options, "double_penalty")
1996                .unwrap_or(type_opt.as_str() != "sz"),
1997            identifiability: BSplineIdentifiability::None,
1998            boundary_conditions: Default::default(),
1999            boundary: OneDimensionalBoundary::Open,
2000        };
2001        let flavour = match type_opt.as_str() {
2002            "fs" => FactorSmoothFlavour::Fs {
2003                m_null_penalty_orders: vec![
2004                    option_usize(options, "m").unwrap_or(DEFAULT_PENALTY_ORDER),
2005                ],
2006            },
2007            "sz" => FactorSmoothFlavour::Sz,
2008            "re" => FactorSmoothFlavour::Re,
2009            // Outer `matches!` already restricts to fs/sz/re.
2010            other => {
2011                return Err(format!(
2012                    "internal: factor-smooth flavour dispatch reached unexpected type `{}`",
2013                    other
2014                ));
2015            }
2016        };
2017        return Ok(SmoothBasisSpec::FactorSmooth {
2018            spec: FactorSmoothSpec {
2019                continuous_cols: vec![c],
2020                group_col: cols[group_idx],
2021                marginal,
2022                flavour,
2023                group_frozen_levels: None,
2024                frozen_global_orthogonality: None,
2025            },
2026        });
2027    }
2028
2029    match type_opt.as_str() {
2030        "cyclic" | "cc" | "cp" | "cyclic-ps" => {
2031            validate_known_options(
2032                "cyclic",
2033                options,
2034                &[
2035                    "type",
2036                    "bs",
2037                    "by",
2038                    "k",
2039                    "basis_dim",
2040                    "basis-dim",
2041                    "basisdim",
2042                    "degree",
2043                    "penalty_order",
2044                    "period",
2045                    "periods",
2046                    "period_start",
2047                    "period_end",
2048                    "start",
2049                    "end",
2050                    "origin",
2051                    "origins",
2052                    "period_origin",
2053                    "period-origin",
2054                    "domain_origin",
2055                    "double_penalty",
2056                    "id",
2057                    "__by_col",
2058                    "identifiability",
2059                ],
2060            )?;
2061            if cols.len() != 1 {
2062                return Err(format!(
2063                    "periodic smooth expects one variable, got {}",
2064                    cols.len()
2065                ));
2066            }
2067            let c = cols[0];
2068            let (minv, maxv) = col_minmax(ds.values.column(c))?;
2069            let degree = option_usize(options, "degree").unwrap_or(DEFAULT_BSPLINE_DEGREE);
2070            let mut default_internal = heuristic_knots_for_column(ds.values.column(c));
2071            if ds.values.nrows() <= 32 && smooth_coordinate_count >= 5 {
2072                default_internal = default_internal.min(1);
2073            }
2074            // A periodic cubic spline has no free endpoint behaviour to spend
2075            // degrees of freedom on: the wrap constraint removes the ordinary
2076            // boundary wiggle, and the cyclic second-difference penalty leaves
2077            // only the constant direction (handled by the smooth
2078            // identifiability constraint).  An over-rich default would give
2079            // small binomial/continuation-ratio fits a large penalized nuisance
2080            // space whose REML/LAML optimum is driven by finite-sample Bernoulli
2081            // noise rather than the low-frequency periodic signal.  Cap the
2082            // cyclic default in the mgcv `bs="cc"` spirit: a modest basis unless
2083            // the caller explicitly requests `k=...`; high-frequency periodic
2084            // structure remains available through that explicit contract.  Since
2085            // gam#1680 lowered the open-spline univariate default to ≈12
2086            // functions this cap and the open-spline default coincide, so it now
2087            // acts as an explicit floor/guard that keeps the cyclic default lean
2088            // even if the open-spline heuristic is later widened.
2089            let cyclic_default_basis_cap = CYCLIC_DEFAULT_BASIS_DIM.max(degree + 1);
2090            let default_basis = (default_internal + degree + 1).min(cyclic_default_basis_cap);
2091            let num_basis = option_usize_any(options, &["k", "basis_dim", "basis-dim", "basisdim"])
2092                .unwrap_or(default_basis);
2093            if num_basis < degree + 1 {
2094                return Err(format!(
2095                    "periodic smooth: k={} too small for degree {}; expected k >= {}",
2096                    num_basis,
2097                    degree,
2098                    degree + 1
2099                ));
2100            }
2101            // The cyclic arm is periodic on its single axis by construction, so
2102            // resolve the period exactly the way the `s()`/`ps` arm does: honour
2103            // `period=`/`periods=` first (with `origin=` setting the domain
2104            // start), and fall back to the `period_start`/`period_end` endpoint
2105            // form only when `period=` is absent. Previously this arm jumped
2106            // straight to `parse_periodic_domain_1d`, so a `period=<v>`
2107            // declaration was silently dropped and the smooth wrapped at the
2108            // data range (#816). All three helpers route through
2109            // `parse_numeric_expr`, so `period=2*pi` and `period_end=2*pi` parse
2110            // identically (#815).
2111            let periodic_axes = [true];
2112            let periods = parse_periods(options, &periodic_axes)?;
2113            let origins = parse_period_origins(options, &periodic_axes)?;
2114            // Distinguish a *cyclic basis selector* (`bs='cc'`/`cp'`/`cyclic`,
2115            // this whole arm) from a generic B-spline forced periodic by a
2116            // `periodic=`/`boundary=` flag (the `ps`/`bspline` arm). Only the
2117            // latter carries the sample-dependent off-by-ε seam that #1771's
2118            // guard in `parse_periodic_domain_1d` requires an explicit period
2119            // to avoid. A bare `s(x, bs='cc')` opts INTO mgcv's `bs="cc"`
2120            // semantics — the wrap IS the observed data range — exactly like
2121            // the tensor cc-margin fallback (`te(x, z, bs=c('cc','cc'))`). The
2122            // cyclic arm was left routing through the now-strict helper when
2123            // #1771 tightened it, so a bare cyclic smooth hard-errored with
2124            // "periodic B-spline smooth requires an explicit period" even
2125            // though its period is well-defined. Honor `period=`/`periods=`
2126            // first, then the half-open `period_start`/`period_end` endpoint
2127            // form, and only otherwise wrap at the observed `[min, max]` span.
2128            let has_endpoint_decl = ["period_start", "start", "period_end", "end"]
2129                .iter()
2130                .any(|key| options.contains_key(*key));
2131            let (domain_start, period) = if let Some(p) = periods[0] {
2132                (origins[0].unwrap_or(minv), p)
2133            } else if has_endpoint_decl {
2134                parse_periodic_domain_1d(options, minv, maxv)?
2135            } else {
2136                let span = maxv - minv;
2137                if !(span.is_finite() && span > 0.0) {
2138                    return Err(format!(
2139                        "cyclic smooth requires a positive observed data range to derive \
2140                         its period, got [{minv}, {maxv}]"
2141                    ));
2142                }
2143                (origins[0].unwrap_or(minv), span)
2144            };
2145            Ok(SmoothBasisSpec::BSpline1D {
2146                feature_col: c,
2147                spec: BSplineBasisSpec {
2148                    degree,
2149                    penalty_order: option_usize(options, "penalty_order")
2150                        .unwrap_or(DEFAULT_PENALTY_ORDER),
2151                    knotspec: BSplineKnotSpec::PeriodicUniform {
2152                        data_range: (domain_start, domain_start + period),
2153                        num_basis,
2154                    },
2155                    double_penalty: smooth_double_penalty,
2156                    identifiability: BSplineIdentifiability::default(),
2157                    boundary_conditions: Default::default(),
2158                    boundary: OneDimensionalBoundary::Cyclic {
2159                        start: domain_start,
2160                        end: domain_start + period,
2161                    },
2162                },
2163            })
2164        }
2165        "bspline" | "ps" | "p-spline" | "cr" | "cs" => {
2166            // mgcv's `bs="cr"` (cubic regression spline) and `bs="cs"` (its
2167            // shrinkage twin) are penalized cubic-regression smooths that span
2168            // the same per-axis function space as gamfit's `bspline` (cubic
2169            // B-spline, second-derivative penalty). Route both through the
2170            // 1-D B-spline arm; the only semantic difference is whether the
2171            // null space is shrunk: `cr` is the no-shrinkage form (mgcv's
2172            // default) and `cs` is the shrinkage form (mgcv's `cs`/gamfit's
2173            // double_penalty). Without this route, a stand-alone
2174            // `s(x, bs='cr')` (which is otherwise a routine 1-D smooth in
2175            // mgcv-compatible formulae) reached the dispatch's default arm
2176            // and aborted the whole fit with `unsupported smooth type 'cr'`,
2177            // even though the same name was already recognized as a tensor
2178            // margin (`tensor_margin_bs_is_supported`).
2179            let validation_name = match type_opt.as_str() {
2180                "cr" => "cr",
2181                "cs" => "cs",
2182                _ => "bspline",
2183            };
2184            validate_known_options(
2185                validation_name,
2186                options,
2187                &[
2188                    "type",
2189                    "bs",
2190                    "by",
2191                    "k",
2192                    "basis_dim",
2193                    "basis-dim",
2194                    "basisdim",
2195                    "knots",
2196                    "knot_placement",
2197                    "knot-placement",
2198                    "knotplacement",
2199                    "degree",
2200                    "penalty_order",
2201                    "boundary",
2202                    "bc",
2203                    "boundary_conditions",
2204                    "bc_left",
2205                    "bc_right",
2206                    "left_bc",
2207                    "right_bc",
2208                    "start_bc",
2209                    "end_bc",
2210                    "side",
2211                    "anchor",
2212                    "anchor_value",
2213                    "value",
2214                    "anchor_left",
2215                    "left_anchor",
2216                    "anchor_right",
2217                    "right_anchor",
2218                    "periodic",
2219                    "period",
2220                    "periods",
2221                    "period_start",
2222                    "period_end",
2223                    "origin",
2224                    "double_penalty",
2225                    "by",
2226                    "id",
2227                    "__by_col",
2228                    "identifiability",
2229                    "by",
2230                ],
2231            )?;
2232            if cols.len() != 1 {
2233                return Err(TermBuilderError::incompatible_config(format!(
2234                    "bspline smooth expects one variable, got {}",
2235                    cols.len()
2236                ))
2237                .to_string());
2238            }
2239            let c = cols[0];
2240            let (minv, maxv) = col_minmax(ds.values.column(c))?;
2241            let degree = option_usize(options, "degree").unwrap_or(DEFAULT_BSPLINE_DEGREE);
2242            let default_internal = heuristic_knots_for_column(ds.values.column(c));
2243            let (mut n_knots, inferred, effective_degree) =
2244                parse_ps_internal_knots(options, degree, default_internal)?;
2245            let periodic_axes = parse_periodic_axes(options, 1).map_err(|e| e.to_string())?;
2246            // Periodic margins still need enough basis functions to wrap, so
2247            // surface the per-axis degree reduction as a config error when the
2248            // user explicitly asked for a periodic-but-too-small basis. The
2249            // non-periodic path silently degrades degree to match mgcv.
2250            if periodic_axes[0] && effective_degree != degree {
2251                return Err(TermBuilderError::invalid_option(format!(
2252                    "periodic smooth: k={} too small for degree {}; expected k >= {}",
2253                    effective_degree + 1,
2254                    degree,
2255                    degree + 1
2256                ))
2257                .to_string());
2258            }
2259            if inferred && ds.values.nrows() <= 32 && smooth_coordinate_count >= 5 {
2260                n_knots = n_knots.min(1);
2261            }
2262            if inferred {
2263                let unique = unique_count_column(ds.values.column(c));
2264                let ceiling = ((unique as f64).cbrt() as usize).max(20);
2265                inference_notes.push(format!(
2266                    "Automatically set {} internal knots for smooth '{}' from {} unique values (rule: clamp(unique/4, 4..max(20, cbrt(unique))) = clamp(unique/4, 4..{})). Override with knots=... or k=....",
2267                    n_knots,
2268                    vars.join(","),
2269                    unique,
2270                    ceiling,
2271                ));
2272            }
2273            let boundary_conditions =
2274                if periodic_axes[0] && bspline_boundary_declares_periodic_axis(options) {
2275                    BSplineBoundaryConditions::default()
2276                } else {
2277                    parse_bspline_boundary_conditions(options).map_err(|e| e.to_string())?
2278                };
2279            let periods = parse_periods(options, &periodic_axes).map_err(|e| e.to_string())?;
2280            let origins =
2281                parse_period_origins(options, &periodic_axes).map_err(|e| e.to_string())?;
2282            let (knotspec, boundary) = if periodic_axes[0] {
2283                if !boundary_conditions.is_free() {
2284                    return Err(TermBuilderError::incompatible_config(
2285                        "periodic B-splines cannot also declare endpoint boundary conditions",
2286                    )
2287                    .to_string());
2288                }
2289                {
2290                    let (domain_start, p_value) = if periods[0].is_some() {
2291                        (origins[0].unwrap_or(minv), periods[0].unwrap())
2292                    } else {
2293                        parse_periodic_domain_1d(options, minv, maxv).map_err(|e| e.to_string())?
2294                    };
2295                    let domain_end = domain_start + p_value;
2296                    (
2297                        BSplineKnotSpec::PeriodicUniform {
2298                            data_range: (domain_start, domain_end),
2299                            num_basis: n_knots + effective_degree + 1,
2300                        },
2301                        OneDimensionalBoundary::Cyclic {
2302                            start: domain_start,
2303                            end: domain_end,
2304                        },
2305                    )
2306                }
2307            } else if type_opt == "cr" || type_opt == "cs" {
2308                // mgcv `bs="cr"`/`"cs"`: a natural cubic regression spline whose
2309                // basis is indexed by `k` values at quantile-placed knots (#1074),
2310                // NOT a B-spline knot vector. Match gam's `k=` convention by
2311                // requesting the same total basis size the B-spline arm would
2312                // produce (`n_knots` internal + degree + 1), floored at the cr
2313                // minimum of 3 knots. `cr` vs `cs` (shrinkage) is carried by the
2314                // `double_penalty` flag resolved below, which the cr builder reads.
2315                //
2316                // Cap that request to the covariate's data support (#1541): a cr
2317                // basis cannot place more value-knots than there are distinct
2318                // covariate values, so an unclamped `k` on a low-cardinality
2319                // predictor (binary indicator, 3-level ordinal, small count) used
2320                // to hard-fail in `select_cr_knots` instead of reducing like mgcv
2321                // and gam's tensor path. Below the cr minimum (a binary covariate)
2322                // degrade to the B-spline marginal the default `s(x, k=..)` basis
2323                // already fits on the same data — never a hard error.
2324                let k_cr = (n_knots + effective_degree + 1).max(CR_MIN_KNOTS);
2325                let knotspec = match capped_cr_marginal_knotspec(
2326                    ds.values.column(c),
2327                    k_cr,
2328                    &vars.join(","),
2329                    inference_notes,
2330                )? {
2331                    Some(cr_knotspec) => cr_knotspec,
2332                    None => resolve_nonperiodic_bspline_knotspec(
2333                        options,
2334                        ds.values.column(c),
2335                        (minv, maxv),
2336                        effective_degree,
2337                        n_knots,
2338                    )?,
2339                };
2340                (knotspec, parse_cyclic_boundary(options, minv, maxv)?)
2341            } else {
2342                (
2343                    resolve_nonperiodic_bspline_knotspec(
2344                        options,
2345                        ds.values.column(c),
2346                        (minv, maxv),
2347                        effective_degree,
2348                        n_knots,
2349                    )?,
2350                    parse_cyclic_boundary(options, minv, maxv)?,
2351                )
2352            };
2353            // mgcv `bs="cr"` does not shrink the linear null space; only `cs`
2354            // (and the gamfit-flavoured `bspline`/`ps`) do. Honour an explicit
2355            // `double_penalty=` either way.
2356            let double_penalty = if type_opt == "cr" {
2357                option_bool(options, "double_penalty").unwrap_or(false)
2358            } else {
2359                smooth_double_penalty
2360            };
2361            // Clamp the marginal difference penalty to `<= effective_degree`
2362            // so it stays well-defined when the per-axis degree was reduced
2363            // (mirrors the tensor margin path: `create_difference_penalty_matrix`
2364            // requires order < num_basis_functions).
2365            let penalty_order = option_usize(options, "penalty_order")
2366                .unwrap_or(DEFAULT_PENALTY_ORDER)
2367                .min(effective_degree);
2368            Ok(SmoothBasisSpec::BSpline1D {
2369                feature_col: c,
2370                spec: BSplineBasisSpec {
2371                    degree: effective_degree,
2372                    penalty_order,
2373                    knotspec,
2374                    double_penalty,
2375                    identifiability: BSplineIdentifiability::default(),
2376                    boundary,
2377                    boundary_conditions,
2378                },
2379            })
2380        }
2381        "tps" | "thinplate" | "thin-plate" => {
2382            validate_known_options(
2383                "thinplate",
2384                options,
2385                &[
2386                    SECONDARY_CENTER_CAP_OPTION,
2387                    "type",
2388                    "bs",
2389                    "by",
2390                    "length_scale",
2391                    "centers",
2392                    "k",
2393                    "basis_dim",
2394                    "basis-dim",
2395                    "basisdim",
2396                    "knots",
2397                    "include_intercept",
2398                    "double_penalty",
2399                    "by",
2400                    "id",
2401                    "__by_col",
2402                    "identifiability",
2403                    "by",
2404                    "periodic",
2405                    "cyclic",
2406                    "period",
2407                    "period_start",
2408                    "period_end",
2409                    "scale_dims",
2410                ],
2411            )?;
2412            let plan = plan_spatial_basis(
2413                ds.values.nrows(),
2414                cols.len(),
2415                CenterCountRequest::Default,
2416                DuchonNullspaceOrder::Linear,
2417                option_bool(options, "scale_dims").unwrap_or(false),
2418                policy,
2419            )
2420            .map_err(|e| e.to_string())?;
2421            // #1074: the mgcv-sized basis cap (`k = 10·3^(d-1)`) that used to live
2422            // here was DELETED. It masked the real defect — the n-scaling default
2423            // over-sizes a thin-plate field, producing a weakly-identified
2424            // two-penalty ρ-surface the outer optimizer stalls on (row-order
2425            // dependent, #1378), and surplus columns REML can't penalize away on
2426            // weak-signal fits. Capping the basis hid that stall instead of fixing
2427            // it. The default now uses the generic spatial center heuristic; the
2428            // root fix (a well-identified ρ-surface / optimizer that doesn't stall)
2429            // is tracked separately. Explicit `k`/`centers` still take full effect.
2430            let default_centers = plan.centers;
2431            let centers = parse_countwith_basis_alias(
2432                options,
2433                "centers",
2434                cap_default_spatial_centers(options, default_centers),
2435            )?;
2436            let center_strategy = if has_explicit_countwith_basis_alias(options, "centers") {
2437                spatial_center_strategy_for_dimension(centers, cols.len())
2438            } else {
2439                auto_spatial_center_strategy(centers, cols.len())
2440            };
2441            Ok(SmoothBasisSpec::ThinPlate {
2442                feature_cols: cols.to_vec(),
2443                spec: ThinPlateBasisSpec {
2444                    center_strategy,
2445                    periodic: parse_periodic_axes_option(options, cols.len())?,
2446                    // Sentinel: leave at 0.0 when the user didn't pass an
2447                    // explicit length_scale so `auto_init_length_scale_in_place`
2448                    // can replace it with a data-derived initialization. The
2449                    // old hard-coded 1.0 was the documented basin (see
2450                    // smooth.rs `auto_init_length_scale_in_place`) that the
2451                    // spatial optimizer could not escape, leaving TPS terms
2452                    // initialized off the data scale.
2453                    length_scale: option_f64(options, "length_scale").unwrap_or(0.0),
2454                    double_penalty: smooth_double_penalty,
2455                    identifiability: parse_spatial_identifiability(options)
2456                        .map_err(|e| e.to_string())?,
2457                    radial_reparam: None,
2458                },
2459                input_scales: None,
2460            })
2461        }
2462        "sphere" | "s2" | "sos" => {
2463            validate_known_options(
2464                "sphere",
2465                options,
2466                &[
2467                    "type",
2468                    "bs",
2469                    "by",
2470                    "centers",
2471                    "k",
2472                    "basis_dim",
2473                    "basis-dim",
2474                    "basisdim",
2475                    "knots",
2476                    "penalty_order",
2477                    "m",
2478                    "double_penalty",
2479                    "id",
2480                    "__by_col",
2481                    "kernel",
2482                    "method",
2483                    "radians",
2484                    "units",
2485                    "degree",
2486                    "l",
2487                    "max_degree",
2488                    "max-degree",
2489                ],
2490            )?;
2491            if cols.len() != 2 {
2492                return Err(format!(
2493                    "sphere smooth expects exactly two variables (lat, lon), got {}",
2494                    cols.len()
2495                ));
2496            }
2497            let radians = option_bool(options, "radians").unwrap_or_else(|| {
2498                options
2499                    .get("units")
2500                    .map(|u| u.eq_ignore_ascii_case("radian") || u.eq_ignore_ascii_case("radians"))
2501                    .unwrap_or(false)
2502            });
2503            // An explicit `degree`/`l`/`max_degree` names a spherical-harmonic
2504            // truncation, so with no explicit kernel/method it selects the
2505            // Harmonic construction (the Wahba kernel ignores `degree` and would
2506            // silently emit a 1-column kernel design). An explicit kernel/method
2507            // still wins.
2508            let degree_requested = options.contains_key("degree")
2509                || options.contains_key("l")
2510                || options.contains_key("max_degree")
2511                || options.contains_key("max-degree");
2512            let kernel = options
2513                .get("kernel")
2514                .or_else(|| options.get("method"))
2515                .map(|raw| strip_quotes(raw).trim().to_ascii_lowercase())
2516                .unwrap_or_else(|| {
2517                    if degree_requested {
2518                        "harmonic".to_string()
2519                    } else {
2520                        "sobolev".to_string()
2521                    }
2522                });
2523            let (method, wahba_kernel) = match kernel.as_str() {
2524                "sobolev" | "wahba" | "wahba_sobolev" | "wahba-sobolev" => {
2525                    (SphereMethod::Wahba, SphereWahbaKernel::Sobolev)
2526                }
2527                "pseudo" | "mgcv" | "sos" | "wahba_pseudo" | "wahba-pseudo" => {
2528                    (SphereMethod::Wahba, SphereWahbaKernel::Pseudo)
2529                }
2530                "harmonic" | "spherical_harmonic" | "spherical-harmonic" => {
2531                    (SphereMethod::Harmonic, SphereWahbaKernel::Sobolev)
2532                }
2533                other => {
2534                    return Err(format!(
2535                        "unsupported sphere kernel '{other}'; expected sobolev, pseudo, or harmonic"
2536                    ));
2537                }
2538            };
2539            let max_degree = if matches!(method, SphereMethod::Harmonic) {
2540                let degree =
2541                    option_usize_any(options, &["degree", "l", "max_degree", "max-degree"])
2542                        .or_else(|| option_usize(options, "centers"))
2543                        .or_else(|| {
2544                            option_usize_any(options, &["k", "basis_dim", "basis-dim", "basisdim"])
2545                                .and_then(|k| (1..=128).find(|&l| l * (l + 2) >= k))
2546                        })
2547                        .unwrap_or_else(|| default_spherical_harmonic_degree(ds.values.nrows()));
2548                if degree == 0 {
2549                    return Err("sphere smooth requires degree/max_degree >= 1".to_string());
2550                }
2551                if degree > 32 {
2552                    return Err(format!(
2553                        "sphere smooth max_degree={} is too large for the dense harmonic engine (limit 32)",
2554                        degree
2555                    ));
2556                }
2557                Some(degree)
2558            } else {
2559                None
2560            };
2561            let penalty_order = option_usize(options, "penalty_order")
2562                .or_else(|| option_usize(options, "m"))
2563                .unwrap_or(DEFAULT_PENALTY_ORDER);
2564            let center_strategy = if matches!(method, SphereMethod::Wahba) {
2565                let mut centers = parse_countwith_basis_alias(
2566                    options,
2567                    "centers",
2568                    default_num_centers(ds.values.nrows(), cols.len()),
2569                )?;
2570                if penalty_order >= 4 {
2571                    centers = centers.max(30);
2572                }
2573                CenterStrategy::FarthestPoint {
2574                    num_centers: centers,
2575                }
2576            } else {
2577                CenterStrategy::FarthestPoint { num_centers: 0 }
2578            };
2579            Ok(SmoothBasisSpec::Sphere {
2580                feature_cols: cols.to_vec(),
2581                spec: SphericalSplineBasisSpec {
2582                    center_strategy,
2583                    penalty_order,
2584                    double_penalty: smooth_double_penalty,
2585                    radians,
2586                    method,
2587                    max_degree,
2588                    wahba_kernel,
2589                    identifiability: SphericalSplineIdentifiability::CenterSumToZero,
2590                },
2591            })
2592        }
2593        "curvature" => {
2594            // Constant-curvature (M_κ) geodesic-kernel smooth (#944): the
2595            // κ-generic sibling of the intrinsic S² smooth above. The feature
2596            // columns are κ-stereographic chart coordinates; `kappa=` is the
2597            // fixed sectional curvature (default 0 = flat), and the geometry
2598            // comes from `geometry::constant_curvature::ConstantCurvature`.
2599            validate_known_options(
2600                "curvature",
2601                options,
2602                &[
2603                    "type",
2604                    "bs",
2605                    "by",
2606                    "centers",
2607                    "k",
2608                    "basis_dim",
2609                    "basis-dim",
2610                    "basisdim",
2611                    "knots",
2612                    "kappa",
2613                    "length_scale",
2614                    "double_penalty",
2615                    "id",
2616                    "__by_col",
2617                ],
2618            )?;
2619            let kappa = option_f64(options, "kappa").unwrap_or(0.0);
2620            if !kappa.is_finite() {
2621                return Err("curvature smooth requires a finite kappa".to_string());
2622            }
2623            let length_scale = option_f64(options, "length_scale").unwrap_or(0.0);
2624            if !length_scale.is_finite() || length_scale < 0.0 {
2625                return Err(format!(
2626                    "curvature smooth length_scale must be positive (or omitted for auto); got {length_scale}"
2627                ));
2628            }
2629            let centers = parse_countwith_basis_alias(
2630                options,
2631                "centers",
2632                default_num_centers(ds.values.nrows(), cols.len()),
2633            )?;
2634            if centers < 2 {
2635                return Err("curvature smooth requires at least 2 centers".to_string());
2636            }
2637            Ok(SmoothBasisSpec::ConstantCurvature {
2638                feature_cols: cols.to_vec(),
2639                spec: ConstantCurvatureBasisSpec {
2640                    center_strategy: CenterStrategy::FarthestPoint {
2641                        num_centers: centers,
2642                    },
2643                    kappa,
2644                    // 0.0 sentinel = κ-independent auto initialization in the
2645                    // basis builder (median chart center spacing, doubled).
2646                    length_scale,
2647                    // Curvature smooth defaults to NO double-penalty ridge
2648                    // (#1464): the curvature-blind ridge `I` absorbs the data fit
2649                    // independently of κ and rails the fitted curvature to the
2650                    // +chart bound (hyperbolic truth recovered as spherical). The
2651                    // RKHS Gram penalty is already full-rank PD, so the ridge adds
2652                    // no stability. Honour an EXPLICIT `double_penalty=` only.
2653                    double_penalty: option_bool(options, "double_penalty").unwrap_or(false),
2654                    identifiability: ConstantCurvatureIdentifiability::CenterSumToZero,
2655                },
2656            })
2657        }
2658        "measurejet" => {
2659            // Measure-jet spline: multiscale local-jet-residual energy of the
2660            // empirical measure. The feature columns are ambient coordinates
2661            // of data concentrated near an unknown low-dimensional set; the
2662            // geometry (centers, masses, scale band) is read off the measure
2663            // at build time — magic by default, every option optional.
2664            validate_known_options(
2665                "measurejet",
2666                options,
2667                &[
2668                    "type",
2669                    "bs",
2670                    "by",
2671                    "centers",
2672                    "k",
2673                    "basis_dim",
2674                    "basis-dim",
2675                    "basisdim",
2676                    "knots",
2677                    "s",
2678                    "alpha",
2679                    "tau",
2680                    "scales",
2681                    "length_scale",
2682                    "double_penalty",
2683                    "multiscale",
2684                    "learn_length_scale",
2685                    "id",
2686                    "__by_col",
2687                ],
2688            )?;
2689            let order_s = option_f64(options, "s").unwrap_or(0.0);
2690            // 0.0 = auto sentinel; explicit values must sit inside the
2691            // admissible order interval of the affine-jet (r = 2) energy.
2692            if !(order_s.is_finite() && (order_s == 0.0 || (order_s > 0.0 && order_s < 2.0))) {
2693                return Err(format!(
2694                    "measurejet smooth s must lie in (0, 2) (or be omitted for auto); got {order_s}"
2695                ));
2696            }
2697            // Default to the spec Default (α = 1, density-WEIGHTED Hessian
2698            // energy — the module-header default). The density-free α = 3/2
2699            // (q^{−2}) over-smooths low-intrinsic-dimension manifolds where the
2700            // local mass q is tiny and varies along the stratum (#1116:
2701            // 13×-worse-than-matérn on a 1-D curve in 3-D); α = 1's q^{−1} is
2702            // gentler and robust across intrinsic dimensions. An explicit
2703            // `alpha=` still overrides for full-dimensional density-free use.
2704            let alpha =
2705                option_f64(options, "alpha").unwrap_or(MeasureJetBasisSpec::default().alpha);
2706            if !alpha.is_finite() {
2707                return Err("measurejet smooth requires a finite alpha".to_string());
2708            }
2709            let tau0 = option_f64(options, "tau").unwrap_or(1e-3);
2710            if !(tau0.is_finite() && tau0 >= 0.0) {
2711                return Err(format!(
2712                    "measurejet smooth tau must be finite and nonnegative; got {tau0}"
2713                ));
2714            }
2715            let num_scales = option_usize(options, "scales").unwrap_or(0);
2716            let length_scale = option_f64(options, "length_scale").unwrap_or(0.0);
2717            if !length_scale.is_finite() || length_scale < 0.0 {
2718                return Err(format!(
2719                    "measurejet smooth length_scale must be positive (or omitted for auto); got {length_scale}"
2720                ));
2721            }
2722            let centers = parse_countwith_basis_alias(
2723                options,
2724                "centers",
2725                default_num_centers(ds.values.nrows(), cols.len()),
2726            )?;
2727            if centers < 3 {
2728                return Err("measurejet smooth requires at least 3 centers".to_string());
2729            }
2730            // Multiscale (per-scale spectral split + (α, lnτ) ψ dials + the
2731            // affine-preserving ridge) is an explicit opt-in (#1116): default
2732            // single-scale at any center count, the Duchon/Matérn footprint.
2733            let multiscale = option_bool(options, "multiscale").unwrap_or(false);
2734            // REML-learning the representer range ℓ is an explicit opt-in.
2735            // The stable default freezes ℓ at the auto/user value; the
2736            // design-moving coordinate is expensive and can overfit low-signal
2737            // surfaces when enabled implicitly.
2738            let learn_length_scale = option_bool(options, "learn_length_scale").unwrap_or(false);
2739            Ok(SmoothBasisSpec::MeasureJet {
2740                feature_cols: cols.to_vec(),
2741                spec: MeasureJetBasisSpec {
2742                    center_strategy: CenterStrategy::FarthestPoint {
2743                        num_centers: centers,
2744                    },
2745                    order_s,
2746                    alpha,
2747                    tau0,
2748                    num_scales,
2749                    // 0.0 sentinel = auto initialization in the basis builder
2750                    // (median nearest-center spacing).
2751                    length_scale,
2752                    double_penalty: smooth_double_penalty,
2753                    learn_length_scale,
2754                    multiscale,
2755                    identifiability: MeasureJetIdentifiability::CenterSumToZero,
2756                    frozen_quadrature: None,
2757                },
2758                input_scales: None,
2759            })
2760        }
2761        "matern" => {
2762            // Catch typos like `lengt_scale=` / `nyu=` / `centerz=` before
2763            // they get silently ignored and the user wonders why their
2764            // option had no effect. The matern() term accepts exactly
2765            // these options.
2766            validate_known_options(
2767                "matern",
2768                options,
2769                &[
2770                    SECONDARY_CENTER_CAP_OPTION,
2771                    "type",
2772                    "bs",
2773                    "by",
2774                    "nu",
2775                    "length_scale",
2776                    "centers",
2777                    "k",
2778                    "basis_dim",
2779                    "basis-dim",
2780                    "basisdim",
2781                    "knots",
2782                    "include_intercept",
2783                    "double_penalty",
2784                    "by",
2785                    "id",
2786                    "__by_col",
2787                    "identifiability",
2788                    "by",
2789                    "periodic",
2790                    "cyclic",
2791                    "period",
2792                    "period_start",
2793                    "period_end",
2794                    "scale_dims",
2795                ],
2796            )?;
2797            let plan = plan_spatial_basis(
2798                ds.values.nrows(),
2799                cols.len(),
2800                CenterCountRequest::Default,
2801                DuchonNullspaceOrder::Zero,
2802                option_bool(options, "scale_dims").unwrap_or(false),
2803                policy,
2804            )
2805            .map_err(|e| e.to_string())?;
2806            let centers = parse_countwith_basis_alias(
2807                options,
2808                "centers",
2809                cap_default_spatial_centers(
2810                    options,
2811                    default_matern_center_count(ds.values.nrows(), cols.len(), plan.centers),
2812                ),
2813            )?;
2814            let center_strategy = if has_explicit_countwith_basis_alias(options, "centers") {
2815                spatial_center_strategy_for_dimension(centers, cols.len())
2816            } else {
2817                auto_spatial_center_strategy(centers, cols.len())
2818            };
2819            let nu = parse_matern_nu(options.get("nu").map(String::as_str).unwrap_or("5/2"))?;
2820            // The exponential (ν = 1/2) Matérn kernel has a singular Laplacian
2821            // at zero in d ≥ 2, so the operator-collocation penalty machinery
2822            // hits a non-invertible matrix during fit. Surface the cause
2823            // up-front instead of letting the user see the generic
2824            // "Matrix conditioning issue detected" wrapper from PIRLS.
2825            if matches!(nu, MaternNu::Half) && cols.len() >= 2 {
2826                return Err(TermBuilderError::unsupported_feature(format!(
2827                    "matern() with nu=1/2 is not supported for d>=2 (got {} covariates): \
2828                     the exponential kernel's Laplacian is singular at center collisions, \
2829                     which makes the operator-collocation penalty non-invertible. \
2830                     Choose nu>=3/2 (e.g. nu=3/2 or the default nu=5/2) for multi-dimensional smooths.",
2831                    cols.len()
2832                ))
2833                .to_string());
2834            }
2835            let aniso_log_scales = if option_bool(options, "scale_dims").unwrap_or(false) {
2836                Some(vec![0.0; cols.len()])
2837            } else {
2838                None
2839            };
2840            Ok(SmoothBasisSpec::Matern {
2841                feature_cols: cols.to_vec(),
2842                spec: MaternBasisSpec {
2843                    center_strategy,
2844                    periodic: parse_periodic_axes_option(options, cols.len())?,
2845                    // Sentinel: leave at 0.0 when the user didn't pass an
2846                    // explicit length_scale so the planner's
2847                    // `auto_init_length_scale_in_place` can replace it with the
2848                    // SAME data-derived wiggly-side initialization the thin-plate
2849                    // path uses (`max_range / sqrt(n)`), then let the κ-optimizer
2850                    // refine from there.
2851                    //
2852                    // gam#1629: the previous `default_matern_length_scale` seeded
2853                    // the FULL data diameter — the maximally over-smoothed corner.
2854                    // Because that value is non-zero, the `0.0`-gated auto-init was
2855                    // a no-op for Matérn, so the κ-optimizer started in the flat
2856                    // over-smoothed basin and parked there, leaving high-frequency
2857                    // 2-D surfaces unresolved (truth-RMSE ~6× worse than
2858                    // thin-plate/tensor on identical data, and insensitive to `k`).
2859                    // Routing Matérn through the same `0.0` sentinel as thin-plate
2860                    // (see the ThinPlate branch above) starts REML in the resolving
2861                    // regime it can actually escape from.
2862                    length_scale: option_f64(options, "length_scale").unwrap_or(0.0),
2863                    nu,
2864                    include_intercept: option_bool(options, "include_intercept").unwrap_or(false),
2865                    double_penalty: smooth_double_penalty,
2866                    identifiability: parse_matern_identifiability(options)
2867                        .map_err(|e| e.to_string())?,
2868                    aniso_log_scales,
2869                    // Cold build: let the bootstrap-κ spectral test decide whether
2870                    // the double-penalty nullspace shrinkage survives; the freeze
2871                    // step then pins that decision into the FrozenTransform so the
2872                    // κ-optimizer's rebuilds keep the count invariant (gam#787/#860).
2873                    nullspace_shrinkage_survived: None,
2874                },
2875                input_scales: None,
2876            })
2877        }
2878        "duchon" => {
2879            validate_known_options(
2880                "duchon",
2881                options,
2882                &[
2883                    SECONDARY_CENTER_CAP_OPTION,
2884                    "type",
2885                    "bs",
2886                    "by",
2887                    "length_scale",
2888                    "centers",
2889                    "k",
2890                    "basis_dim",
2891                    "basis-dim",
2892                    "basisdim",
2893                    "knots",
2894                    "power",
2895                    "p",
2896                    "nullspace_order",
2897                    "order",
2898                    "identifiability",
2899                    "by",
2900                    "periodic",
2901                    "cyclic",
2902                    "period",
2903                    "period_start",
2904                    "period_end",
2905                    "scale_dims",
2906                    "double_penalty",
2907                    "by",
2908                    "id",
2909                    "__by_col",
2910                ],
2911            )?;
2912            if options.contains_key("double_penalty") {
2913                return Err(TermBuilderError::incompatible_config(format!(
2914                    "Duchon smooth '{}' does not support double_penalty; the Duchon smoother already ships its native reproducing-norm penalty plus a null-space shrinkage ridge.",
2915                    vars.join(", ")
2916                ))
2917                .to_string());
2918            }
2919            let requested_nullspace_order = parse_duchon_order(options)?;
2920            let length_scale = option_f64_strict(options, "length_scale")?;
2921            // Resolve `(nullspace_order, power)`. The default (magic) path is a
2922            // structural amplitude/slope/curvature smoother: an affine (`Linear`)
2923            // polynomial nullspace and spectral power `s = (d - 1)/2`, giving the
2924            // cubic kernel `r^3` in 1D. There is no nullspace-order escalation —
2925            // the structural cubic smoother is well-defined for every dimension.
2926            //
2927            // Explicit `power=...` honors the user's value verbatim against their
2928            // requested nullspace order; the kernel validator emits a precise
2929            // diagnostic for any inadmissible combination. In the scale-free
2930            // (non-hybrid) regime fractional powers are admitted and threaded as
2931            // `f64`. The hybrid Duchon-Matérn kernel (`length_scale=Some`) is
2932            // restricted to integer powers.
2933            let (nullspace_order, power) = match parse_duchon_power_policy(options)? {
2934                DuchonPowerPolicy::Explicit(req_power) => {
2935                    if length_scale.is_some() && req_power.fract() != 0.0 {
2936                        return Err(TermBuilderError::incompatible_config(format!(
2937                            "hybrid Duchon-Matern smooth '{}' (length_scale=...) requires an integer power, got power={}; \
2938                             drop length_scale to use the scale-free structural kernel with a fractional power.",
2939                            vars.join(", "),
2940                            req_power,
2941                        ))
2942                        .to_string());
2943                    }
2944                    (requested_nullspace_order, req_power)
2945                }
2946                DuchonPowerPolicy::CubicStructuralDefault => {
2947                    // Magic cubic rule (REQUEST-LAYER default): no explicit power ⇒
2948                    // affine null space + fractional spectral power s = (d-1)/2, i.e.
2949                    // the Duchon kernel φ(r)=r³ in every dimension. An EXPLICIT
2950                    // `power=0` is handled above and is honored as the s=0 Duchon
2951                    // kernel (r²·log r ≡ the thin-plate kernel in even d) — the magic
2952                    // default lives here, not in the basis builder.
2953                    match length_scale {
2954                        None => crate::basis::duchon_cubic_default(cols.len()),
2955                        Some(_) => {
2956                            // The hybrid Matérn-blended kernel (`length_scale=Some`)
2957                            // requires an INTEGER spectral power `s` (the partial-
2958                            // fraction split `1/(ρ^{2p}(κ²+ρ²)^s)` is only defined for
2959                            // integer `s`). The fractional cubic default `s=(d-1)/2` is
2960                            // a half-integer for even `d`, and the basis builder's
2961                            // `power_as_usize` maps a NON-integer to `0` (not its
2962                            // floor) — so for even `d ≥ 4` the realized kernel has
2963                            // `2(p+s) = 2p = 4 ≤ d`, which is non-finite at the origin
2964                            // and crashes the fit (historically a non-finite
2965                            // eigendecomposition; now a fit-time validation error).
2966                            //
2967                            // Rather than emit the fractional cubic and let it truncate
2968                            // into an inadmissible kernel, resolve the SMALLEST
2969                            // admissible integer `(nullspace, s)` at the requested
2970                            // nullspace order, honoring the collocation order of the
2971                            // default operator penalties (mass + tension ⇒ D1). This
2972                            // recovers the canonical thin-plate smoothness order
2973                            // `m = p + s = ⌊d/2⌋ + 1` for the hybrid kernel and agrees
2974                            // with the fractional cubic default for odd `d` (where the
2975                            // collocation floor already forces `s = (d-1)/2`).
2976                            let max_op = crate::basis::duchon_max_active_operator_derivative_order(
2977                                &DuchonOperatorPenaltySpec::default(),
2978                            );
2979                            let (ns, s) = crate::basis::resolve_duchon_orders(
2980                                cols.len(),
2981                                requested_nullspace_order,
2982                                max_op,
2983                                length_scale,
2984                            );
2985                            (ns, s as f64)
2986                        }
2987                    }
2988                }
2989            };
2990            let plan = plan_spatial_basis(
2991                ds.values.nrows(),
2992                cols.len(),
2993                CenterCountRequest::Default,
2994                nullspace_order,
2995                option_bool(options, "scale_dims").unwrap_or(false),
2996                policy,
2997            )
2998            .map_err(|e| e.to_string())?;
2999            let centers_explicit = has_explicit_countwith_basis_alias(options, "centers");
3000            let requested_centers = parse_countwith_basis_alias(
3001                options,
3002                "centers",
3003                cap_default_spatial_centers(options, plan.centers),
3004            )?;
3005            let polynomial_cols = match nullspace_order {
3006                DuchonNullspaceOrder::Zero => 1,
3007                DuchonNullspaceOrder::Linear => cols.len() + 1,
3008                DuchonNullspaceOrder::Degree(degree) => {
3009                    crate::basis::duchon_nullspace_dimension(cols.len(), degree)
3010                }
3011            };
3012            if requested_centers <= polynomial_cols {
3013                return Err(TermBuilderError::incompatible_config(format!(
3014                    "Duchon smooth '{}' requested basis dimension {} but order={:?} in {}D needs {} polynomial null-space columns; choose centers/k > {}",
3015                    vars.join(", "),
3016                    requested_centers,
3017                    nullspace_order,
3018                    cols.len(),
3019                    polynomial_cols,
3020                    polynomial_cols,
3021                ))
3022                .to_string());
3023            }
3024            let mut centers = requested_centers;
3025            if !centers_explicit && ds.values.nrows() <= 32 && smooth_coordinate_count >= 5 {
3026                centers = centers.max(polynomial_cols + 4);
3027            }
3028            let center_strategy = if centers_explicit {
3029                spatial_center_strategy_for_dimension(centers, cols.len())
3030            } else {
3031                auto_spatial_center_strategy(centers, cols.len())
3032            };
3033            let aniso_log_scales = if option_bool(options, "scale_dims").unwrap_or(false) {
3034                Some(vec![0.0; cols.len()])
3035            } else {
3036                None
3037            };
3038            // The default is the full Hilbert scale (curvature `Primary` + trend
3039            // ridge + mass + tension); REML deselects what the data don't support.
3040            let operator_penalties = DuchonOperatorPenaltySpec::default();
3041            // For a 1-D periodic Duchon with no EXPLICIT period, anchor the wrap
3042            // to the covariate DATA range rather than letting the basis builder
3043            // derive it from the (k-subsampled) center span. The center span is a
3044            // strict subset of the data and undershoots the true period, seaming
3045            // the curve (f(0) ≠ f(2π)); the data range is the caller's actual
3046            // domain. Honors any explicit `period=` (parse_periodic_axes_option
3047            // already threaded it) and leaves multi-D / non-periodic untouched.
3048            let mut periodic = parse_periodic_axes_option(options, cols.len())?;
3049            if cols.len() == 1
3050                && let Some(axes) = periodic.as_mut()
3051                && axes.len() == 1
3052                && axes[0].is_none()
3053            {
3054                let (minv, maxv) = col_minmax(ds.values.column(cols[0]))?;
3055                if maxv > minv {
3056                    axes[0] = Some(maxv - minv);
3057                }
3058            }
3059            Ok(SmoothBasisSpec::Duchon {
3060                feature_cols: cols.to_vec(),
3061                spec: DuchonBasisSpec {
3062                    center_strategy,
3063                    periodic,
3064                    length_scale,
3065                    power,
3066                    nullspace_order,
3067                    identifiability: parse_spatial_identifiability(options)
3068                        .map_err(|e| e.to_string())?,
3069                    aniso_log_scales,
3070                    operator_penalties,
3071                    boundary: if cols.len() == 1 {
3072                        let c = cols[0];
3073                        let (minv, maxv) = col_minmax(ds.values.column(c))?;
3074                        parse_cyclic_boundary(options, minv, maxv)?
3075                    } else {
3076                        OneDimensionalBoundary::Open
3077                    },
3078                    radial_reparam: None,
3079                },
3080                input_scales: None,
3081            })
3082        }
3083        "tensor" | "te" | "ti" | "t2" => {
3084            validate_known_options(
3085                "tensor",
3086                options,
3087                &[
3088                    "type",
3089                    "bs",
3090                    "by",
3091                    "k",
3092                    "basis_dim",
3093                    "basis-dim",
3094                    "basisdim",
3095                    "knot_placement",
3096                    "knot-placement",
3097                    "knotplacement",
3098                    "degree",
3099                    "penalty_order",
3100                    "double_penalty",
3101                    "periodic",
3102                    "cyclic",
3103                    "period",
3104                    "periods",
3105                    "period_start",
3106                    "period_end",
3107                    "origin",
3108                    "origins",
3109                    "period_origin",
3110                    "period-origin",
3111                    "domain_origin",
3112                    "boundary",
3113                    "bc",
3114                    "identifiability",
3115                    "id",
3116                    "__by_col",
3117                ],
3118            )?;
3119            if cols.len() < 2 {
3120                return Err(TermBuilderError::incompatible_config(format!(
3121                    "tensor smooth expects at least 2 variables, got {}",
3122                    cols.len()
3123                ))
3124                .to_string());
3125            }
3126            let dim = cols.len();
3127
3128            // Tensor-product contract (#1082). `te(x1, x2, ...)` ALWAYS builds a
3129            // genuine anisotropic tensor product of per-margin bases (the arm
3130            // below), exactly as mgcv's `te()` does — one smoothing parameter per
3131            // margin, a marginal-Kronecker-sum penalty, and the bilinear null
3132            // space left unpenalized under the default `select = FALSE`. A margin
3133            // vector `bs=c('tp','tp')` requests a thin-plate FUNCTION SPACE per
3134            // axis; the tensor realizes each axis as a 1-D penalized B-spline
3135            // margin spanning that same per-axis space (tp/ps/cr/bs/cc all share
3136            // it). We deliberately do NOT silently swap the requested tensor for a
3137            // single multi-D ISOTROPIC thin-plate radial smooth (`s(x,y,bs='tp')`):
3138            // that is a different model — one isotropic smoothing parameter, no
3139            // per-margin anisotropy — and substituting it while the user wrote a
3140            // tensor formula is dishonest. A user who genuinely wants the isotropic
3141            // radial smooth asks for it directly with `s(x1, x2, bs='tp')`.
3142            // Per-margin basis vector (`bs=c('tp','tp')` / `bs=['ps','cr']`):
3143            // validate each requested margin is a penalized-spline basis that
3144            // the tensor product realizes as a 1-D B-spline margin. mgcv's
3145            // `tp`/`ps`/`cr`/`bs`/`cc` margins are all penalized splines over
3146            // the same per-axis function space, so a B-spline margin recovers
3147            // the same tensor smoothing space; genuinely different margin kinds
3148            // (e.g. adaptive `ad`, random `re`) are rejected loudly rather than
3149            // silently substituted.
3150            if let Some(raw) = options.get("bs").or_else(|| options.get("type"))
3151                && bs_selector_is_vector(raw)
3152            {
3153                let per_margin = parse_option_list(raw);
3154                if per_margin.len() != dim {
3155                    return Err(TermBuilderError::invalid_option(format!(
3156                        "tensor smooth per-margin bs vector has {} entries but the smooth has {} margins",
3157                        per_margin.len(),
3158                        dim
3159                    ))
3160                    .to_string());
3161                }
3162                for (axis, margin_bs) in per_margin.iter().enumerate() {
3163                    if !tensor_margin_bs_is_supported(margin_bs) {
3164                        return Err(TermBuilderError::unsupported_feature(format!(
3165                            "tensor smooth margin {axis} basis '{margin_bs}' is not a supported penalized-spline margin; \
3166                             tensor margins accept tp/tps/ps/bs/cr/cc"
3167                        ))
3168                        .to_string());
3169                    }
3170                }
3171            }
3172            let periodic_axes = parse_tensor_periodic_axes(options, dim)?;
3173            validate_tensor_boundary_tokens(options, dim)?;
3174            let periods_opt = parse_periods(options, &periodic_axes)?;
3175            let origins_opt = parse_period_origins(options, &periodic_axes)?;
3176            let degree = option_usize(options, "degree").unwrap_or(DEFAULT_BSPLINE_DEGREE);
3177            let penalty_order =
3178                option_usize(options, "penalty_order").unwrap_or(if degree > 1 { 2 } else { 1 });
3179            let (mut k_list, k_inferred) = parse_tensor_k_list(options, cols, ds)?;
3180            if ds.values.nrows() <= 32 && smooth_coordinate_count >= 5 {
3181                for k in &mut k_list {
3182                    *k = (*k).min(degree + 2);
3183                }
3184            }
3185            if k_inferred {
3186                inference_notes.push(format!(
3187                    "Automatically set per-margin basis sizes {:?} for tensor smooth '{}' \
3188                     (dimension-aware tensor budget: total ∏k kept near the mgcv-te default \
3189                     and within the data support, distributed geometrically across margins and \
3190                     capped per margin by each column's resolution). \
3191                     Override with k=<int> or k=[k0,k1,...].",
3192                    k_list,
3193                    vars.join(",")
3194                ));
3195            }
3196            // Per-axis requested marginal basis family. mgcv's `te()`/`ti()`
3197            // default marginal basis is the cubic regression spline (`cr`), and
3198            // the te_3d quality gap (#1074) is precisely the marginal-basis
3199            // resolution at small `k`: a `cr` margin places k value-knots at
3200            // data quantiles (finer interior resolution under natural boundary
3201            // constraints) where the cubic B-spline margin has only
3202            // `k-degree-1` interior knots. Resolve each axis to either an
3203            // explicit per-margin `bs` (vector `bs=c('cr','ps')`), a single
3204            // scalar `bs`, or the unset default — and route
3205            // `cr`/`cs`/unset/`tp`/`tps` margins through the natural cubic
3206            // regression builder (`NaturalCubicRegression` knotspec), keeping
3207            // explicit `ps`/`bs`/`bspline` on the B-spline margin.
3208            let per_axis_bs: Vec<Option<String>> =
3209                match options.get("bs").or_else(|| options.get("type")) {
3210                    Some(raw) if bs_selector_is_vector(raw) => {
3211                        let list = parse_option_list(raw);
3212                        (0..dim).map(|a| list.get(a).cloned()).collect()
3213                    }
3214                    Some(raw) => {
3215                        let scalar = raw
3216                            .trim()
3217                            .trim_matches('"')
3218                            .trim_matches('\'')
3219                            .to_ascii_lowercase();
3220                        vec![Some(scalar); dim]
3221                    }
3222                    None => vec![None; dim],
3223                };
3224            // A margin is realized as a natural cubic regression spline when it
3225            // is the (unset) mgcv default, an explicit `cr`/`cs`, or a
3226            // `tp`/`tps` (same per-axis penalized-spline space). Explicit
3227            // B-spline-family margins (`ps`/`bs`/`bspline`/`p-spline`) keep the
3228            // open B-spline margin.
3229            let margin_wants_cr = |bs: &Option<String>| -> bool {
3230                matches!(
3231                    bs.as_deref(),
3232                    None | Some("cr") | Some("cs") | Some("tp") | Some("tps")
3233                )
3234            };
3235            let mut margins: Vec<BSplineBasisSpec> = Vec::with_capacity(dim);
3236            let mut emitted_periods: Vec<Option<f64>> = Vec::with_capacity(dim);
3237            for axis in 0..dim {
3238                let c = cols[axis];
3239                let (data_min, data_max) = col_minmax(ds.values.column(c))?;
3240                // mgcv reduces a tensor margin's basis dimension to what its data
3241                // can support: a cr or B-spline margin cannot place more value
3242                // knots / basis functions than there are DISTINCT covariate
3243                // values on that axis. Without this cap an explicit `k` on a
3244                // low-cardinality margin — e.g. the binary `badh ∈ {0,1}` in
3245                // `te(age, badh, k=5)` — hard-failed in `select_cr_knots` ("cubic
3246                // regression spline with k=5 requires at least 5 distinct values,
3247                // got 2") instead of degrading to the 2-function (linear) margin
3248                // mgcv builds there. The auto-`k` path already caps per margin via
3249                // `heuristic_tensor_margin_knots`; mirror that for explicit `k`.
3250                // The cap propagates correctly: every per-axis quantity below
3251                // (effective degree, knot set, penalty order) is derived from
3252                // `k_axis`, and the marginal basis size is read from the resulting
3253                // knot spec — never from `k_list`. Floor at 2 so a margin still
3254                // carries at least a linear basis (tensor margins require k >= 2).
3255                let k_requested = k_list[axis];
3256                let n_distinct_axis = unique_count_column(ds.values.column(c));
3257                let k_axis = k_requested.min(n_distinct_axis).max(2);
3258                if k_axis < k_requested {
3259                    log::info!(
3260                        "tensor smooth: margin axis {axis} requested k={k_requested}, but the \
3261                         covariate has only {n_distinct_axis} distinct value(s); reducing this \
3262                         margin to k={k_axis} (mgcv-style data-support cap on the per-axis basis)."
3263                    );
3264                }
3265                // Per-axis effective spline degree. The B-spline basis with `k`
3266                // functions is well-defined for any `degree <= k - 1`; mgcv's
3267                // `te(...)` exploits this so a binary tensor margin
3268                // (`k=2` → linear basis) or a ternary margin (`k=3` → quadratic)
3269                // can coexist with a smoother continuous margin under one
3270                // shared `degree=` request. We mirror that: if the caller
3271                // explicitly asks for `k < degree + 1`, drop the degree on
3272                // THAT axis only to the largest feasible spline, and track the
3273                // penalty order so the marginal difference penalty stays
3274                // well-defined (`order < num_basis_functions` is required by
3275                // `create_difference_penalty_matrix`). Periodic axes still
3276                // need enough basis functions to wrap; reject k there.
3277                if k_axis < 2 {
3278                    return Err(TermBuilderError::invalid_option(format!(
3279                        "tensor smooth: k[{axis}]={k_axis} too small; tensor margins require k >= 2"
3280                    ))
3281                    .to_string());
3282                }
3283                if periodic_axes[axis] && k_axis < degree + 1 {
3284                    return Err(TermBuilderError::invalid_option(format!(
3285                        "tensor smooth: periodic axis {axis} requires k >= {} for degree {degree}, got k={k_axis}",
3286                        degree + 1
3287                    ))
3288                    .to_string());
3289                }
3290                let effective_degree = degree.min(k_axis - 1).max(1);
3291                let effective_penalty_order = penalty_order.min(effective_degree);
3292                // A `cc`/`cp`/`cyclic` per-margin basis declares periodicity
3293                // without necessarily supplying a `period=`: mgcv's `bs="cc"`
3294                // wraps at the covariate's observed data range. Mirror the 1-D
3295                // cyclic fallback (`parse_periodic_domain_1d`) here so a bare
3296                // `te(x, z, bs=c('cc','cc'))` wraps each margin on its own
3297                // [min, max] span instead of hard-erroring (#1752).
3298                let margin_is_cc = matches!(
3299                    canonicalize_smooth_type(per_axis_bs[axis].as_deref().unwrap_or("")),
3300                    "cc" | "cp" | "cyclic"
3301                );
3302                let (knotspec, boundary, axis_period) = if periodic_axes[axis] {
3303                    // A `cc`/`cp`/`cyclic` per-margin basis declares periodicity
3304                    // without necessarily supplying a `period=`; in that case wrap
3305                    // at the covariate's observed [min, max] span, mirroring the
3306                    // 1-D cyclic fallback (`parse_periodic_domain_1d`) so a bare
3307                    // `te(x, z, bs=c('cc','cc'))` wraps each margin on its own
3308                    // range instead of hard-erroring (#1752). An axis made
3309                    // periodic by an explicit `periodic=`/`boundary=` selector
3310                    // (not a cyclic margin basis) still requires an explicit
3311                    // `period=`: a data-derived period there is a sample-dependent
3312                    // off-by-ε seam and is not inferred.
3313                    let (domain_start, period_value) = match periods_opt[axis] {
3314                        Some(period_value) => {
3315                            if !period_value.is_finite() || period_value <= 0.0 {
3316                                return Err(format!(
3317                                    "tensor smooth axis {axis}: period must be a positive finite value, got {period_value}"
3318                                ));
3319                            }
3320                            (origins_opt[axis].unwrap_or(data_min), period_value)
3321                        }
3322                        None if margin_is_cc => {
3323                            let span = data_max - data_min;
3324                            if !span.is_finite() || span <= 0.0 {
3325                                return Err(format!(
3326                                    "tensor smooth axis {axis}: cyclic margin requires a positive \
3327                                     observed data range to derive its period, got [{data_min}, {data_max}]"
3328                                ));
3329                            }
3330                            (origins_opt[axis].unwrap_or(data_min), span)
3331                        }
3332                        None => {
3333                            return Err(format!(
3334                                "tensor smooth axis {axis} is periodic but requires an explicit \
3335                                 period: pass period=<value> (scalar) or period=[..., <value>, ...]. \
3336                                 Deriving the period from the observed data range is sample-dependent \
3337                                 (off-by-ε seam), so it is not inferred."
3338                            ));
3339                        }
3340                    };
3341                    let domain_end = domain_start + period_value;
3342                    (
3343                        BSplineKnotSpec::PeriodicUniform {
3344                            data_range: (domain_start, domain_end),
3345                            num_basis: k_axis,
3346                        },
3347                        OneDimensionalBoundary::Cyclic {
3348                            start: domain_start,
3349                            end: domain_end,
3350                        },
3351                        Some(period_value),
3352                    )
3353                } else if margin_wants_cr(&per_axis_bs[axis]) && k_axis >= 3 {
3354                    // mgcv `te()`/`ti()` default cr margin: place exactly
3355                    // `k_axis` Lancaster–Salkauskas value-knots at data
3356                    // quantiles. The cr basis dimension equals the knot count,
3357                    // so this reproduces the requested per-margin `k` directly.
3358                    // A natural cubic regression spline needs at least 3 knots
3359                    // (one interior); a `k_axis < 3` margin (e.g. a binary
3360                    // tensor axis requesting a linear margin) falls through to
3361                    // the B-spline branch below, exactly as before #1074 — mgcv
3362                    // likewise does not build a `cr` margin below k=3.
3363                    let cr_knots =
3364                        crate::basis::select_cr_knots(ds.values.column(c), k_axis)
3365                            .map_err(|e| e.to_string())?;
3366                    (
3367                        BSplineKnotSpec::NaturalCubicRegression { knots: cr_knots },
3368                        OneDimensionalBoundary::Open,
3369                        None,
3370                    )
3371                } else {
3372                    // `num_internal_knots = k - degree - 1` reproduces the
3373                    // requested basis size exactly when degree was reduced for
3374                    // a low-cardinality margin; keep the legacy `.max(1)`
3375                    // floor on the un-reduced path so the existing knot
3376                    // geometry is unchanged whenever the user already passed
3377                    // k >= degree + 1.
3378                    let num_internal_knots = if effective_degree < degree {
3379                        k_axis.saturating_sub(effective_degree + 1)
3380                    } else {
3381                        k_axis.saturating_sub(degree + 1).max(1)
3382                    };
3383                    let knotspec = match parse_knot_placement(options)? {
3384                        crate::basis::BSplineKnotPlacement::Uniform => BSplineKnotSpec::Generate {
3385                            data_range: (data_min, data_max),
3386                            num_internal_knots,
3387                        },
3388                        crate::basis::BSplineKnotPlacement::Quantile => {
3389                            crate::basis::auto_knot_vector_1d_quantile(
3390                                ds.values.column(c),
3391                                num_internal_knots,
3392                                effective_degree,
3393                            )
3394                            .map_err(|e| e.to_string())?;
3395                            BSplineKnotSpec::Automatic {
3396                                num_internal_knots: Some(num_internal_knots),
3397                                placement: crate::basis::BSplineKnotPlacement::Quantile,
3398                            }
3399                        }
3400                    };
3401                    (knotspec, OneDimensionalBoundary::Open, None)
3402                };
3403                // A `cr` margin fixes cubic regression geometry; the cr builder
3404                // reads only the knot set + `double_penalty`. Enable null-space
3405                // shrinkage for an explicit `cs` margin. B-spline margins keep
3406                // the resolved effective degree / penalty order with no extra
3407                // null-space penalty (mgcv `select = FALSE` tensor default).
3408                let is_cr_margin =
3409                    matches!(knotspec, BSplineKnotSpec::NaturalCubicRegression { .. });
3410                let margin_double_penalty =
3411                    is_cr_margin && matches!(per_axis_bs[axis].as_deref(), Some("cs"));
3412                margins.push(BSplineBasisSpec {
3413                    degree: effective_degree,
3414                    penalty_order: effective_penalty_order,
3415                    knotspec,
3416                    double_penalty: margin_double_penalty,
3417                    identifiability: BSplineIdentifiability::None,
3418                    boundary,
3419                    boundary_conditions: BSplineBoundaryConditions::default(),
3420                });
3421                emitted_periods.push(axis_period);
3422            }
3423            // #1593: canonicalize the margin order so a tensor smooth is invariant
3424            // to the typed order of its covariates. `te(x, z)` and `te(z, x)` span
3425            // the IDENTICAL tensor-product space under the identical per-margin
3426            // penalty family, but the design is the Khatri–Rao product
3427            // `B_first ⊙ B_second`, so the typed order permutes the design columns
3428            // (and the per-margin penalty blocks `S_first⊗I`, `I⊗S_second`). That
3429            // permutation is a pure relabelling in exact arithmetic — REML is
3430            // invariant to it — yet it reorders the penalized normal-equation / REML
3431            // eigen/Cholesky linear algebra, and the resulting sub-ULP differences
3432            // route the outer λ optimizer to a different terminal point in te's flat
3433            // REML valley (the over-smoothed margin rails to the ρ bound while the
3434            // other lands on a materially different λ̂). So the shipped surface
3435            // drifted ~2–6 % of range with a cosmetic swap of the covariate order
3436            // (the #1378 row-permutation / #1456 rotation flat-valley gauge family).
3437            // Sorting the margins by their source feature-column index makes the same
3438            // physical model build the identical problem regardless of typed order,
3439            // so the fit — and every prediction rebuilt from the resolved spec — is
3440            // genuinely order-invariant. `ti`/`t2` share this arm and become exactly
3441            // invariant too (they were already ~1e-5 by centring each margin
3442            // separately; canonicalization makes the swap bit-identical).
3443            let canon_cols: Vec<usize> = {
3444                let mut perm: Vec<usize> = (0..dim).collect();
3445                perm.sort_by_key(|&a| cols[a]);
3446                if perm.iter().enumerate().any(|(i, &a)| i != a) {
3447                    margins = perm.iter().map(|&a| margins[a].clone()).collect();
3448                    emitted_periods = perm.iter().map(|&a| emitted_periods[a]).collect();
3449                }
3450                perm.iter().map(|&a| cols[a]).collect()
3451            };
3452            let any_periodic = emitted_periods.iter().any(|p| p.is_some());
3453            let periods_vec = if any_periodic {
3454                emitted_periods
3455            } else {
3456                Vec::new()
3457            };
3458            // Tensor smooths (`te`/`ti`/`t2`) must match mgcv's DEFAULT
3459            // `select = FALSE`: the joint null space of the per-margin
3460            // penalties — the bilinear, low-order interaction directions that
3461            // no marginal roughness operator can see — is left UNPENALIZED.
3462            // mgcv only adds a null-space shrinkage penalty there under the
3463            // opt-in `select = TRUE` (which gam exposes as `double_penalty`).
3464            //
3465            // The general smooth default (`smooth_double_penalty`, true) is
3466            // calibrated for 1-D `s()` terms; carrying it into tensors silently
3467            // shrinks the genuinely-present bilinear interaction signal, so
3468            // REML places positive weight on the extra ridge and systematically
3469            // OVER-SMOOTHS the recovered surface relative to mgcv's plain
3470            // `te`/`ti` (gam#700/#701/#702/#703). Default tensors to no extra
3471            // null-space penalty; an explicit user `double_penalty=`/`select=`
3472            // still wins.
3473            let tensor_double_penalty = option_bool(options, "double_penalty").unwrap_or(false);
3474            Ok(SmoothBasisSpec::TensorBSpline {
3475                feature_cols: canon_cols,
3476                spec: TensorBSplineSpec {
3477                    marginalspecs: margins,
3478                    periods: periods_vec,
3479                    double_penalty: tensor_double_penalty,
3480                    identifiability: parse_tensor_identifiability(options, kind)?,
3481                    // `t2` selects mgcv's separable (Wood, Scheipl & Faraway
3482                    // 2013) decomposition. It can arrive either as the `t2(...)`
3483                    // function form (`SmoothKind::T2`) or as a `type="t2"` /
3484                    // `bs="t2"` option on an `s(...)`/`te(...)` term, in which
3485                    // case `kind` is *not* `T2` but the resolved type string is
3486                    // "t2". Keying only off `kind` silently aliased the option
3487                    // form to `te`'s Kronecker-sum penalty (gam#1185); key off
3488                    // the resolved type string as well so both routes build the
3489                    // separable penalty.
3490                    penalty_decomposition: if matches!(kind, SmoothKind::T2)
3491                        || type_opt.as_str() == "t2"
3492                    {
3493                        TensorBSplinePenaltyDecomposition::Separable
3494                    } else {
3495                        TensorBSplinePenaltyDecomposition::MarginalKroneckerSum
3496                    },
3497                },
3498            })
3499        }
3500        "pca" => {
3501            validate_known_options(
3502                "pca",
3503                options,
3504                &[
3505                    "type",
3506                    "bs",
3507                    "by",
3508                    "k",
3509                    "basis_dim",
3510                    "basis-dim",
3511                    "basisdim",
3512                    "lazy_path",
3513                    "path",
3514                    "pca_basis_path",
3515                    "chunk_size",
3516                    "smooth_penalty",
3517                    "centered",
3518                    "double_penalty",
3519                    "id",
3520                    "__by_col",
3521                ],
3522            )?;
3523            let path = options
3524                .get("lazy_path")
3525                .or_else(|| options.get("pca_basis_path"))
3526                .or_else(|| options.get("path"))
3527                .map(|raw| PathBuf::from(strip_quotes(raw)));
3528            let Some(path) = path else {
3529                return Err(TermBuilderError::incompatible_config(
3530                    "pca smooth requires lazy_path=... on the formula path",
3531                )
3532                .to_string());
3533            };
3534            let k = option_usize_any(options, &["k", "basis_dim", "basis-dim", "basisdim"])
3535                .unwrap_or(0);
3536            let chunk_size = option_usize(options, "chunk_size").unwrap_or(DEFAULT_PCA_CHUNK_SIZE);
3537            Ok(SmoothBasisSpec::Pca {
3538                feature_cols: cols.to_vec(),
3539                basis_matrix: Array2::<f64>::zeros((cols.len(), k)),
3540                centered: option_bool(options, "centered").unwrap_or(true),
3541                smooth_penalty: option_f64(options, "smooth_penalty").unwrap_or(1.0),
3542                center_mean: None,
3543                pca_basis_path: Some(path),
3544                chunk_size,
3545            })
3546        }
3547        other => Err(TermBuilderError::unsupported_feature(format!(
3548            "unsupported smooth type '{other}'"
3549        ))
3550        .to_string()),
3551    }
3552}
3553
3554/// Initialise per-axis anisotropic log-scales on eligible spatial smooth specs.
3555pub fn enable_scale_dimensions(spec: &mut TermCollectionSpec) {
3556    for smooth in spec.smooth_terms.iter_mut() {
3557        // A multi-axis thin-plate term cannot carry per-axis anisotropy on its
3558        // single curvature penalty, so `scale_dimensions` was historically a
3559        // silent no-op for `bs="tp"` (gam#1676). Rewrite it to the
3560        // mathematically-equivalent anisotropic s=0 Duchon spline first; the
3561        // Duchon arm below then sees an already-seeded `aniso_log_scales` and
3562        // leaves it untouched.
3563        promote_thin_plate_for_scale_dimensions(&mut smooth.basis);
3564        match &mut smooth.basis {
3565            SmoothBasisSpec::Matern {
3566                feature_cols,
3567                spec: matern,
3568                ..
3569            } => {
3570                if matern.aniso_log_scales.is_none() {
3571                    let d = feature_cols.len();
3572                    matern.aniso_log_scales = Some(vec![0.0; d]);
3573                }
3574            }
3575            SmoothBasisSpec::Duchon {
3576                feature_cols,
3577                spec: duchon,
3578                ..
3579            } => {
3580                if duchon.aniso_log_scales.is_none() {
3581                    let d = feature_cols.len();
3582                    duchon.aniso_log_scales = Some(vec![0.0; d]);
3583                }
3584            }
3585            _ => {}
3586        }
3587    }
3588}
3589
3590/// Rewrite a multi-axis thin-plate term into the mathematically-equivalent
3591/// anisotropic s=0 Duchon spline so that `scale_dimensions` genuinely engages
3592/// (gam#1676).
3593///
3594/// ## Why a rewrite rather than a new field on the TPS builder
3595///
3596/// A canonical thin-plate regression spline carries a *single* curvature
3597/// penalty — the exact `∫|Dᵐ f|²` reproducing-kernel Gram. That penalty has no
3598/// per-axis structure to make one direction more or less relevant than another,
3599/// so per-axis anisotropy (`scale_dimensions`) cannot be expressed on it. The
3600/// flag was therefore a silent no-op for `bs="tp"` while it engaged for
3601/// `duchon()`/`matern()`.
3602///
3603/// The thin-plate kernel `r^{2m−d}` (the `r²·log r` log-case in even `d`) is
3604/// *exactly* the s=0 Duchon kernel (`DuchonBasisSpec::power = 0`,
3605/// `length_scale = None`) at the matching polynomial null-space order
3606/// `m = thin_plate_penalty_order(d)`. The Duchon polyharmonic family already
3607/// carries the per-axis tension ARD that `scale_dimensions` requests: its
3608/// isotropic first-order roughness penalty `Σ‖∇f‖²` splits into `d` directional
3609/// penalties `Σ(∂f/∂x_a)²`, each with its own REML `λ_a`
3610/// (`duchon_operator_penalty_candidates`). So the well-posed *anisotropic
3611/// thin-plate spline is the anisotropic s=0 Duchon spline*. Rewriting to that
3612/// representation reuses the battle-tested Duchon anisotropy / ψ-derivative /
3613/// freeze / predict machinery instead of duplicating it onto the TPS metadata
3614/// path, and keeps the polyharmonic family internally consistent. The codebase
3615/// already promotes infeasible-`k` TPS to Duchon for the same reason (the
3616/// canonical TPS single curvature penalty cannot deliver a requested
3617/// capability); per-axis anisotropy is another such capability.
3618///
3619/// This fires *only* when the user opts into `scale_dimensions`; the default
3620/// thin-plate path (`scale_dimensions` off) is left bit-for-bit unchanged.
3621/// A 1-D thin-plate term is left untouched — anisotropy is meaningless on a
3622/// single axis (its `Σ η = 0` contrast vector is empty), exactly as for a 1-D
3623/// Matérn/Duchon term.
3624fn promote_thin_plate_for_scale_dimensions(basis: &mut SmoothBasisSpec) {
3625    let SmoothBasisSpec::ThinPlate {
3626        feature_cols,
3627        spec,
3628        input_scales,
3629    } = &*basis
3630    else {
3631        return;
3632    };
3633    let d = feature_cols.len();
3634    if d <= 1 {
3635        return;
3636    }
3637    // m = thin_plate_penalty_order(d) is the TPS penalty order; the Duchon
3638    // null-space order naming is `Zero → m=1`, `Linear → m=2`,
3639    // `Degree(g) → m=g+1`, so the s=0 Duchon kernel exponent
3640    // `2(p+s) − d = 2m − d` reproduces the TPS kernel exactly.
3641    let m = thin_plate_penalty_order(d);
3642    let nullspace_order = match m {
3643        0 | 1 => DuchonNullspaceOrder::Zero,
3644        2 => DuchonNullspaceOrder::Linear,
3645        _ => DuchonNullspaceOrder::Degree(m - 1),
3646    };
3647    let duchon_spec = DuchonBasisSpec {
3648        center_strategy: spec.center_strategy.clone(),
3649        periodic: spec.periodic.clone(),
3650        // Pure, scale-free Duchon — the thin-plate kernel has no length scale
3651        // (a global TPS kernel scale is non-identifiable once REML learns the
3652        // smoothing penalty: gam#718/#721/#731/#732). The per-axis relevance
3653        // the user asked for is carried by the tension-ARD `λ_a`, not a κ axis.
3654        length_scale: None,
3655        // s = 0  ⇒  thin-plate kernel `r^{2m−d}`.
3656        power: 0.0,
3657        nullspace_order,
3658        identifiability: spec.identifiability.clone(),
3659        // All-zero geometry seed sentinel: `auto_seed_aniso_contrasts` resolves
3660        // it from the (standardized) knot cloud, and the per-axis tension split
3661        // engages on `aniso.is_some()`.
3662        aniso_log_scales: Some(vec![0.0; d]),
3663        operator_penalties: DuchonOperatorPenaltySpec::default(),
3664        boundary: OneDimensionalBoundary::Open,
3665        radial_reparam: None,
3666    };
3667    let feature_cols = feature_cols.clone();
3668    let input_scales = input_scales.clone();
3669    // All borrows of `*basis` (the `&*basis` destructure above) end with the
3670    // clones on the two preceding lines, so the reassignment is sound.
3671    *basis = SmoothBasisSpec::Duchon {
3672        feature_cols,
3673        spec: duchon_spec,
3674        input_scales,
3675    };
3676}
3677
3678// ---------------------------------------------------------------------------
3679// Data-aware helpers
3680// ---------------------------------------------------------------------------
3681
3682pub fn spatial_center_strategy_for_dimension(num_centers: usize, d: usize) -> CenterStrategy {
3683    if d <= 3 {
3684        // In low-dimensional spatial smooths, an explicit `k` is a resolution
3685        // request rather than a request for marginal quantile-midpoint centers.
3686        // Use deterministic maximin geometry so Matérn/GP and Duchon REML see a
3687        // well-resolved native kernel block with small fill distance instead of
3688        // compensating for holes or endpoint under-resolution by over-smoothing
3689        // low-noise signals (#504).
3690        CenterStrategy::FarthestPoint { num_centers }
3691    } else {
3692        default_spatial_center_strategy(num_centers, d)
3693    }
3694}
3695
3696pub fn col_minmax(col: ArrayView1<'_, f64>) -> Result<(f64, f64), String> {
3697    let min = col.iter().fold(f64::INFINITY, |a, &b| a.min(b));
3698    let max = col.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
3699    if !min.is_finite() || !max.is_finite() {
3700        return Err(TermBuilderError::degenerate_data(
3701            "non-finite data encountered while inferring knot range",
3702        )
3703        .to_string());
3704    }
3705    if (max - min).abs() < 1e-12 {
3706        Ok((min, min + 1e-6))
3707    } else {
3708        Ok((min, max))
3709    }
3710}
3711
3712pub fn unique_count_column(col: ArrayView1<'_, f64>) -> usize {
3713    use std::collections::HashSet;
3714    let mut set = HashSet::<u64>::with_capacity(col.len());
3715    for &v in col {
3716        let norm = if v == 0.0 { 0.0 } else { v };
3717        set.insert(norm.to_bits());
3718    }
3719    set.len().max(1)
3720}
3721
3722/// Minimum knot count for a natural cubic regression spline: `select_cr_knots`
3723/// places one value-knot per basis function and needs at least an interior knot,
3724/// so the sparsest representable cr basis is `{const, linear, curvature}` at
3725/// three knots. Below this a cr spline is not constructible and the caller must
3726/// degrade to the linear B-spline marginal.
3727pub(crate) const CR_MIN_KNOTS: usize = 3;
3728
3729/// Build a cubic-regression marginal knot spec capped to the covariate's data
3730/// support, mgcv-style.
3731///
3732/// A `cr`/`cs`/`sz` marginal places exactly one basis function per value-knot,
3733/// so `select_cr_knots` cannot place more knots than the covariate has DISTINCT
3734/// values — it `bail`s with "cubic regression spline with k=N requires at least
3735/// N distinct values" otherwise. An unclamped `k` on an ordinary low-cardinality
3736/// covariate (a binary indicator, a 3-level ordinal/Likert score, a small count)
3737/// therefore hard-failed the whole fit instead of reducing the basis the way
3738/// mgcv — and gam's own tensor-margin path (996f829d7, `term_builder.rs:2986` /
3739/// the `k_axis >= 3` cr gate at `:3047`) — do. This is the univariate / factor-
3740/// smooth sibling of that tensor cap (#1541, #1542).
3741///
3742/// Returns:
3743/// - `Some(NaturalCubicRegression { .. })` with `k = min(k_requested, n_distinct)`
3744///   value-knots when the data supports a cr spline (`n_distinct >= CR_MIN_KNOTS`).
3745///   A cr basis of exactly `n_distinct` knots is full-rank for the data — it can
3746///   represent any per-distinct-value structure (e.g. 3 arbitrary group means on
3747///   a ternary covariate) — so the cap never costs recoverable signal.
3748/// - `None` when `n_distinct < CR_MIN_KNOTS` (a binary covariate): too few
3749///   distinct values for ANY cr spline, so the caller degrades to the linear
3750///   B-spline marginal — exactly what the default `s(x, k=..)` basis already
3751///   builds on the same data, and what the tensor path's `< 3` branch builds.
3752///
3753/// `inference_notes` records any reduction so the user sees that `k` was capped
3754/// (mgcv emits a warning in the same situation).
3755fn capped_cr_marginal_knotspec(
3756    col: ArrayView1<'_, f64>,
3757    k_cr_requested: usize,
3758    label: &str,
3759    inference_notes: &mut Vec<String>,
3760) -> Result<Option<BSplineKnotSpec>, String> {
3761    let n_distinct = unique_count_column(col);
3762    let k_cr = k_cr_requested.min(n_distinct);
3763    if k_cr < CR_MIN_KNOTS {
3764        inference_notes.push(format!(
3765            "Smooth '{label}': cubic-regression ('cr'/'cs'/'sz') basis requested k={k_cr_requested}, \
3766             but the covariate has only {n_distinct} distinct value(s) — too few to support a cubic \
3767             regression spline (needs >= {CR_MIN_KNOTS} distinct values). Degraded to the linear \
3768             B-spline marginal the default basis builds on the same data."
3769        ));
3770        return Ok(None);
3771    }
3772    if k_cr < k_cr_requested {
3773        inference_notes.push(format!(
3774            "Smooth '{label}': cubic-regression ('cr'/'cs'/'sz') basis reduced from k={k_cr_requested} \
3775             to k={k_cr} to match the covariate's {n_distinct} distinct value(s) (mgcv-style \
3776             data-support cap; a cr basis cannot place more value-knots than the data has)."
3777        ));
3778    }
3779    let cr_knots = crate::basis::select_cr_knots(col, k_cr).map_err(|e| e.to_string())?;
3780    Ok(Some(BSplineKnotSpec::NaturalCubicRegression {
3781        knots: cr_knots,
3782    }))
3783}
3784
3785/// Smallest number of distinct covariate values seen within any single group
3786/// of `group_col`. For a factor smooth this is the resolution that bounds the
3787/// marginal basis: a group with `m` distinct covariate values can only inform
3788/// `m` basis coefficients, so a marginal richer than that interpolates the
3789/// group instead of estimating a penalized trend. Bits are compared exactly so
3790/// integer-valued covariates (days, dose levels) collapse to their true count.
3791fn min_per_group_unique_count(
3792    feature_col: ArrayView1<'_, f64>,
3793    group_col: ArrayView1<'_, f64>,
3794) -> usize {
3795    use std::collections::{HashMap, HashSet};
3796    let mut per_group: HashMap<u64, HashSet<u64>> = HashMap::new();
3797    for (xi, gi) in feature_col.iter().zip(group_col.iter()) {
3798        let xnorm = if *xi == 0.0 { 0.0 } else { *xi };
3799        let gnorm = if *gi == 0.0 { 0.0 } else { *gi };
3800        per_group
3801            .entry(gnorm.to_bits())
3802            .or_default()
3803            .insert(xnorm.to_bits());
3804    }
3805    per_group
3806        .values()
3807        .map(|s| s.len())
3808        .min()
3809        .unwrap_or(1)
3810        .max(1)
3811}
3812
3813/// Default internal-knot count for an *additive* univariate smooth, derived
3814/// from the column's unique-value count.
3815///
3816/// The basis dimension is `internal_knots + degree + 1`, so the cap below maps
3817/// to a default cubic basis of ~12 functions — deliberately close to mgcv's
3818/// univariate default (`k = 10`). A penalized smooth controls its wiggliness
3819/// through the *penalty*, not the basis size: REML/LAML shrinks a too-rich
3820/// basis toward the null, but it cannot do so cleanly when the basis is so
3821/// over-sized that the design becomes weakly identified. Growing the basis with
3822/// `n` (the old `n^(1/3)`-ceilinged `unique/4` rule, which pinned to 20 internal
3823/// knots ⇒ a 24-function basis for any column with ≥80 unique values) therefore
3824/// *hurts* recovery on finite, weak-signal fits: a 4-smooth additive model on
3825/// n=120 asks for ~92 coefficients, the outer optimizer stalls on the resulting
3826/// flat two-penalty (range + null-space) REML surface, and the truth leaks into
3827/// surplus columns the penalty can't shrink away (gam#1680; the same defect was
3828/// documented for thin-plate fields in gam#1074). A k-sweep on the #1680 design
3829/// confirms a basis of ~10–15 recovers truth at RMSE ≈ 0.12 while the old
3830/// 24-function default lands at ≈ 0.39 (~3× worse) — *whether or not* the
3831/// covariates are collinear, so this is basis over-richness, not collinearity.
3832///
3833/// The cap is flat in `n`: a user who genuinely needs a wigglier fit raises `k`
3834/// explicitly (mgcv's contract — opt *in* to more flexibility), and the SPEC
3835/// requires the default to allow recovering the null rather than forcing the
3836/// user to opt out of overfitting. The 4-knot floor stays put because we still
3837/// need enough basis functions to fit a non-trivial smooth at all, and the
3838/// `unique/4` growth below the cap keeps small/sparse columns (n ≤ 32, where
3839/// `unique/4 ≤ 8`) on exactly their previous knot count.
3840pub fn heuristic_knots_for_column(col: ArrayView1<'_, f64>) -> usize {
3841    /// Default cubic basis ≈ `MAX_DEFAULT_INTERNAL_KNOTS + degree + 1` = 12
3842    /// functions, matching mgcv's lean univariate default.
3843    const MAX_DEFAULT_INTERNAL_KNOTS: usize = 8;
3844    let unique = unique_count_column(col);
3845    (unique / 4).clamp(4, MAX_DEFAULT_INTERNAL_KNOTS)
3846}
3847
3848/// Per-margin basis sizes for a tensor-product smooth (`te`/`ti`/`t2`).
3849///
3850/// The 1-D heuristic [`heuristic_knots_for_column`] is calibrated for an
3851/// *additive* margin: a well-resolved column asks for the lean univariate
3852/// default (≈12 basis functions, the mgcv-like cap of 8 internal knots; see
3853/// gam#1680), which is sensible for a single `s(x)` term.
3854/// A tensor product, however, multiplies the per-margin sizes:
3855/// `p = ∏_d k_d`. Reusing the 1-D rule per margin makes `p` explode with the
3856/// tensor dimension — a 3-D `te(x,y,z)` at the 1-D ceiling of 12/margin is
3857/// `12³ ≈ 1728` columns, and every REML evaluation pays an O(p³) dense
3858/// penalty reparameterization (the full-tensor sum-to-zero constraint is not
3859/// Kronecker-factorable), turning model selection over tensor candidates into
3860/// a multi-minute single-threaded stall (gam#813). It also requests far more
3861/// coefficients than the data can identify whenever `p ≫ n`.
3862///
3863/// mgcv's `te(...)` uses a small per-margin default (`k = 5`, i.e. `5^d`).
3864/// We match that spirit while staying data-adaptive: budget the *total* tensor
3865/// column count `p_target` and distribute it geometrically across the margins
3866/// so `∏ k_d ≈ p_target`, never asking a margin for more functions than its
3867/// own unique values (and the data set) can support.
3868fn heuristic_tensor_margin_knots(cols: &[usize], ds: &Dataset) -> Vec<usize> {
3869    let d = cols.len().max(1);
3870    let degree = DEFAULT_BSPLINE_DEGREE;
3871    let min_k = degree + 2; // smallest margin that carries a difference penalty
3872    let n = ds.values.nrows();
3873
3874    // Per-margin 1-D ceiling: never request more basis functions than the
3875    // margin's own resolution (unique values) supports. This caps each axis
3876    // independently before the joint budget is applied.
3877    let per_margin_cap: Vec<usize> = cols
3878        .iter()
3879        .map(|&c| heuristic_knots_for_column(ds.values.column(c)).max(min_k))
3880        .collect();
3881
3882    // Total-basis budget. A tensor with ∏k ≫ n coefficients is rank-deficient
3883    // and pure REML cost; cap the product at a generous fraction of n while
3884    // honoring mgcv's small default for the common small-d case. The budget
3885    // grows with n but the geometric split below keeps each margin modest.
3886    //   d=2 → up to ~7²=49 (mgcv-`te`-like), d=3 → ~5³=125, larger d shrinks
3887    // per-margin further so the product never blows past the data support.
3888    let mgcv_like_per_margin = match d {
3889        2 => 7usize,
3890        3 => 5usize,
3891        _ => 4usize,
3892    };
3893    let mgcv_like_total = (mgcv_like_per_margin as f64).powi(d as i32);
3894    let data_budget = (n as f64) * 0.8;
3895    let p_target = mgcv_like_total
3896        .max(min_k.pow(d as u32) as f64)
3897        .min(data_budget);
3898
3899    // Geometric per-margin target so ∏k ≈ p_target, then clamp each margin to
3900    // its own 1-D resolution cap and the difference-penalty floor.
3901    let geo_per_margin = p_target.powf(1.0 / d as f64).round() as usize;
3902    let unclamped: Vec<usize> = per_margin_cap
3903        .iter()
3904        .map(|&cap| geo_per_margin.clamp(min_k, cap))
3905        .collect();
3906
3907    // The per-margin clamps can pull some axes below `geo_per_margin` (a
3908    // low-resolution column), leaving headroom in the joint budget. Redistribute
3909    // that headroom to the margins that can still grow, so the realized ∏k stays
3910    // close to p_target instead of systematically under-shooting it.
3911    let mut k_list = unclamped;
3912    loop {
3913        let product: f64 = k_list.iter().map(|&k| k as f64).product();
3914        if product >= p_target {
3915            break;
3916        }
3917        // Grow the axis with the most remaining headroom (cap − current),
3918        // breaking ties toward the largest cap. Stop when none can grow.
3919        let Some(idx) = k_list
3920            .iter()
3921            .zip(per_margin_cap.iter())
3922            .enumerate()
3923            .filter(|&(_, (k, cap))| k < cap)
3924            .max_by_key(|&(_, (k, cap))| (cap - k, *cap))
3925            .map(|(i, _)| i)
3926        else {
3927            break;
3928        };
3929        k_list[idx] += 1;
3930    }
3931    k_list
3932}
3933
3934pub fn heuristic_centers(n: usize, d: usize) -> usize {
3935    default_num_centers(n, d)
3936}
3937
3938// ---------------------------------------------------------------------------
3939// Smooth option parsers
3940// ---------------------------------------------------------------------------
3941
3942fn parse_endpoint_side(
3943    value: &str,
3944    context: &str,
3945) -> Result<BSplineEndpointBoundaryCondition, String> {
3946    match value.trim().to_ascii_lowercase().as_str() {
3947        "" | "none" | "open" | "unconstrained" | "free" => {
3948            Ok(BSplineEndpointBoundaryCondition::Free)
3949        }
3950        "clamped" | "clamp" | "zero_derivative" | "zero-derivative" => {
3951            Ok(BSplineEndpointBoundaryCondition::Clamped)
3952        }
3953        "anchored" | "anchor" | "zero" | "zero_value" | "zero-value" => {
3954            Ok(BSplineEndpointBoundaryCondition::Anchored { value: 0.0 })
3955        }
3956        other => Err(format!(
3957            "unsupported {context} boundary condition '{other}'; expected free, clamped, or anchored"
3958        )),
3959    }
3960}
3961
3962fn boundary_anchor_value(
3963    options: &BTreeMap<String, String>,
3964    side: &str,
3965    fallback: Option<f64>,
3966) -> Option<f64> {
3967    [
3968        format!("anchor_{side}"),
3969        format!("{side}_anchor"),
3970        format!("anchor-value-{side}"),
3971    ]
3972    .iter()
3973    .find_map(|key| option_f64(options, key))
3974    .or(fallback)
3975}
3976
3977fn apply_anchor_value(
3978    cond: BSplineEndpointBoundaryCondition,
3979    value: Option<f64>,
3980) -> BSplineEndpointBoundaryCondition {
3981    match cond {
3982        BSplineEndpointBoundaryCondition::Anchored { .. } => {
3983            BSplineEndpointBoundaryCondition::Anchored {
3984                value: value.unwrap_or(0.0),
3985            }
3986        }
3987        other => other,
3988    }
3989}
3990
3991fn parse_bspline_boundary_conditions(
3992    options: &BTreeMap<String, String>,
3993) -> Result<BSplineBoundaryConditions, String> {
3994    let fallback_anchor = option_f64(options, "anchor")
3995        .or_else(|| option_f64(options, "anchor_value"))
3996        .or_else(|| option_f64(options, "value"));
3997    let global_boundary_conditions = options
3998        .get("boundary_conditions")
3999        .or_else(|| options.get("bc"));
4000    let mut boundary_conditions = BSplineBoundaryConditions::default();
4001
4002    if let Some(raw_boundary_conditions) = global_boundary_conditions {
4003        let cond = parse_endpoint_side(raw_boundary_conditions, "boundary_conditions")?;
4004        let side = options
4005            .get("side")
4006            .map(|s| s.trim().to_ascii_lowercase())
4007            .unwrap_or_else(|| "both".to_string());
4008        match side.as_str() {
4009            "both" | "all" | "endpoints" => {
4010                boundary_conditions.left = cond;
4011                boundary_conditions.right = cond;
4012            }
4013            "left" | "start" | "lower" => boundary_conditions.left = cond,
4014            "right" | "end" | "upper" => boundary_conditions.right = cond,
4015            other => {
4016                return Err(format!(
4017                    "unsupported B-spline boundary side '{other}'; expected left, right, or both"
4018                ));
4019            }
4020        }
4021    }
4022
4023    if let Some(raw) = options
4024        .get("bc_left")
4025        .or_else(|| options.get("left_bc"))
4026        .or_else(|| options.get("bc_start"))
4027        .or_else(|| options.get("start_bc"))
4028    {
4029        boundary_conditions.left = parse_endpoint_side(raw, "left endpoint")?;
4030    }
4031    if let Some(raw) = options
4032        .get("bc_right")
4033        .or_else(|| options.get("right_bc"))
4034        .or_else(|| options.get("bc_end"))
4035        .or_else(|| options.get("end_bc"))
4036    {
4037        boundary_conditions.right = parse_endpoint_side(raw, "right endpoint")?;
4038    }
4039
4040    boundary_conditions.left = apply_anchor_value(
4041        boundary_conditions.left,
4042        boundary_anchor_value(options, "left", fallback_anchor),
4043    );
4044    boundary_conditions.right = apply_anchor_value(
4045        boundary_conditions.right,
4046        boundary_anchor_value(options, "right", fallback_anchor),
4047    );
4048
4049    // Non-zero anchors require an affine offset term that the current basis
4050    // builder does not synthesize (see `build_bspline_basis_1d` in
4051    // src/terms/basis.rs). Surface the rejection at parse time with the side
4052    // and value in the diagnostic, instead of letting the value-only error
4053    // emerge deep inside the basis builder where the user has no context
4054    // about which anchor key (`anchor`, `left_anchor`, `right_anchor`, …)
4055    // routed into which endpoint.
4056    reject_nonzero_anchor("left", boundary_conditions.left)?;
4057    reject_nonzero_anchor("right", boundary_conditions.right)?;
4058
4059    Ok(boundary_conditions)
4060}
4061
4062fn reject_nonzero_anchor(side: &str, cond: BSplineEndpointBoundaryCondition) -> Result<(), String> {
4063    if let BSplineEndpointBoundaryCondition::Anchored { value } = cond {
4064        if value.abs() > 1e-12 {
4065            return Err(format!(
4066                "non-zero {side} anchor {value} requires an affine offset term that is not yet supported; only anchored value 0 is accepted at parse time"
4067            ));
4068        }
4069    }
4070    Ok(())
4071}
4072
4073/// Resolve the requested internal-knot count and effective spline degree for
4074/// a 1-D penalized B-spline smooth. This mirrors the tensor-margin per-axis
4075/// degree-reduction policy: a 1-D B-spline basis with `k` functions
4076/// is well-defined for any `degree <= k - 1`, so an explicit
4077/// `s(x, bs="ps", k=3)` with default `degree=3` is interpreted as the
4078/// largest representable spline (`effective_degree = k - 1 = 2`, quadratic)
4079/// rather than rejected. The `penalty_order` carried by the caller must be
4080/// clamped to `<= effective_degree` so the marginal difference penalty
4081/// stays well-defined; the returned `effective_degree` makes that explicit.
4082///
4083/// Mirrors the tensor margin treatment in the `te(...)` builder so a
4084/// standalone smooth, a factor smooth, and a tensor margin all interpret
4085/// "small k" the same way.
4086fn parse_ps_internal_knots(
4087    options: &BTreeMap<String, String>,
4088    degree: usize,
4089    default_internal_knots: usize,
4090) -> Result<(usize, bool, usize), String> {
4091    const MIN_EXPRESSIVE_INTERNAL_KNOTS: usize = 2;
4092    // Strict variants: reject `k=-1`, `k=1.5`, `knots=-2` etc. with a
4093    // focused error instead of silently dropping the value and using the
4094    // default. Lenient `option_usize` / `option_usize_any` silently swallow
4095    // unparseable values, which leaves the user thinking they configured
4096    // something when they did not.
4097    // A list-valued `knots=[...]` carries explicit internal positions, not a
4098    // count; it is consumed by `parse_explicit_internal_knots`. Treat it as
4099    // "count not specified" here so the strict integer parse does not reject
4100    // the bracketed value (the Provided path ignores the returned count).
4101    let knots_internal = if knots_option_is_list(options) {
4102        None
4103    } else {
4104        option_usize_strict(options, "knots")?
4105    };
4106    let basis_dim = option_usize_any_strict(options, &["k", "basis_dim", "basis-dim", "basisdim"])?;
4107    if knots_internal.is_some() && basis_dim.is_some() {
4108        return Err(TermBuilderError::incompatible_config(
4109            "ps/bspline smooth: specify either knots=<internal_knots> or k=<basis_dim> (not both)",
4110        )
4111        .to_string());
4112    }
4113    if let Some(k) = basis_dim {
4114        if k < 2 {
4115            return Err(TermBuilderError::invalid_option(format!(
4116                "ps/bspline smooth: k={} too small; B-spline basis requires k >= 2",
4117                k
4118            ))
4119            .to_string());
4120        }
4121        // `degree <= k - 1` is required for the B-spline basis to be
4122        // well-defined; reduce on this axis only when the user asked for
4123        // a smaller k than the cubic default supports. This matches mgcv's
4124        // behaviour (e.g. `s(x, bs="ps", k=3)` becomes a quadratic basis)
4125        // and the per-axis reduction the tensor builder already does.
4126        let effective_degree = degree.min(k - 1).max(1);
4127        let num_internal_knots = if effective_degree < degree {
4128            // Reproduce the requested basis size exactly when degree was
4129            // reduced for a low-cardinality axis: num_basis = k.
4130            k.saturating_sub(effective_degree + 1)
4131        } else {
4132            (k - degree - 1).max(MIN_EXPRESSIVE_INTERNAL_KNOTS)
4133        };
4134        Ok((num_internal_knots, false, effective_degree))
4135    } else {
4136        Ok((
4137            knots_internal.unwrap_or(default_internal_knots),
4138            knots_internal.is_none(),
4139            degree,
4140        ))
4141    }
4142}
4143
4144/// True when the `knots` option value is a *list* literal (`[...]`, `c(...)`,
4145/// or `(...)`) rather than a scalar count. mgcv's `knots=` accepts both: a
4146/// single integer is an internal-knot count, while a vector is explicit
4147/// internal knot positions. We disambiguate purely on the wrapper syntax so a
4148/// bare `knots=5` keeps its historical count meaning.
4149fn knots_option_is_list(options: &BTreeMap<String, String>) -> bool {
4150    options
4151        .get("knots")
4152        .map(|raw| {
4153            let t = raw.trim();
4154            t.starts_with('[') || t.starts_with("c(") || t.starts_with("C(") || t.starts_with('(')
4155        })
4156        .unwrap_or(false)
4157}
4158
4159/// Parse `knots=[k0, k1, ...]` (or `c(...)` / `(...)`) into explicit internal
4160/// knot positions. Returns `Ok(None)` when `knots` is absent or a scalar count
4161/// (handled by [`parse_ps_internal_knots`]); `Ok(Some(positions))` when it is a
4162/// non-empty numeric list; and an error for an empty or unparseable list.
4163fn parse_explicit_internal_knots(
4164    options: &BTreeMap<String, String>,
4165) -> Result<Option<Vec<f64>>, String> {
4166    if !knots_option_is_list(options) {
4167        return Ok(None);
4168    }
4169    let raw = options
4170        .get("knots")
4171        .expect("knots_option_is_list implies the key is present");
4172    let tokens = split_list_option(raw);
4173    if tokens.is_empty() {
4174        return Err(TermBuilderError::invalid_option(format!(
4175            "knots={raw} is an empty list; supply at least one internal knot position \
4176             (e.g. knots=[0.2, 0.5, 0.8]) or a scalar count (e.g. knots=8)"
4177        ))
4178        .to_string());
4179    }
4180    let mut positions = Vec::with_capacity(tokens.len());
4181    for tok in &tokens {
4182        let value = parse_numeric_expr(tok).map_err(|err| {
4183            TermBuilderError::invalid_option(format!(
4184                "knots list entry '{tok}' is not a numeric position: {err}"
4185            ))
4186            .to_string()
4187        })?;
4188        positions.push(value);
4189    }
4190    Ok(Some(positions))
4191}
4192
4193/// Resolve the `knot_placement=` option for an automatically generated knot
4194/// vector. Accepts `"uniform"` (the default, equal spacing on the data range)
4195/// and `"quantile"` (interior knots at empirical data quantiles, better for
4196/// skewed covariates). Unknown values are rejected so typos do not silently
4197/// fall back to uniform.
4198fn parse_knot_placement(
4199    options: &BTreeMap<String, String>,
4200) -> Result<crate::basis::BSplineKnotPlacement, String> {
4201    use crate::basis::BSplineKnotPlacement;
4202    match options
4203        .get("knot_placement")
4204        .or_else(|| options.get("knot-placement"))
4205        .or_else(|| options.get("knotplacement"))
4206    {
4207        None => Ok(BSplineKnotPlacement::Uniform),
4208        Some(raw) => match raw
4209            .trim()
4210            .trim_matches('"')
4211            .trim_matches('\'')
4212            .to_ascii_lowercase()
4213            .as_str()
4214        {
4215            "uniform" | "even" | "equal" => Ok(BSplineKnotPlacement::Uniform),
4216            "quantile" | "quantiles" | "data" | "empirical" => Ok(BSplineKnotPlacement::Quantile),
4217            other => Err(TermBuilderError::invalid_option(format!(
4218                "knot_placement={other} is not recognised; expected \"uniform\" or \"quantile\""
4219            ))
4220            .to_string()),
4221        },
4222    }
4223}
4224
4225/// Build the non-periodic 1D B-spline knot spec for the `ps`/`bspline` and
4226/// factor-smooth marginal paths, honoring (in priority order):
4227///   1. `knots=[...]` explicit internal positions  → [`BSplineKnotSpec::Provided`]
4228///   2. `knot_placement="quantile"`                 → [`BSplineKnotSpec::Automatic`]
4229///   3. uniform generation                          → [`BSplineKnotSpec::Generate`]
4230///
4231/// `data` is the covariate column (used to clamp explicit positions to the
4232/// observed range and to drive quantile placement); `n_knots` is the resolved
4233/// internal-knot count from [`parse_ps_internal_knots`] used for the automatic
4234/// strategies.
4235fn resolve_nonperiodic_bspline_knotspec(
4236    options: &BTreeMap<String, String>,
4237    data: ArrayView1<'_, f64>,
4238    data_range: (f64, f64),
4239    degree: usize,
4240    n_knots: usize,
4241) -> Result<BSplineKnotSpec, String> {
4242    use crate::basis::{BSplineKnotPlacement, clamped_knot_vector_from_internal_positions};
4243    if let Some(positions) = parse_explicit_internal_knots(options)? {
4244        if option_usize_any_strict(options, &["k", "basis_dim", "basis-dim", "basisdim"])?.is_some()
4245        {
4246            return Err(TermBuilderError::incompatible_config(
4247                "ps/bspline smooth: specify either explicit knots=[...] positions or \
4248                 k=<basis_dim> (not both); the basis size is fixed by the knot vector",
4249            )
4250            .to_string());
4251        }
4252        let knots = clamped_knot_vector_from_internal_positions(data_range, &positions, degree)
4253            .map_err(|e| e.to_string())?;
4254        return Ok(BSplineKnotSpec::Provided(knots));
4255    }
4256    match parse_knot_placement(options)? {
4257        BSplineKnotPlacement::Uniform => Ok(BSplineKnotSpec::Generate {
4258            data_range,
4259            num_internal_knots: n_knots,
4260        }),
4261        BSplineKnotPlacement::Quantile => {
4262            // Validate the column up-front so an unfittable request surfaces a
4263            // user-correctable error at parse time rather than deep in basis
4264            // construction. The same data drives the eventual quantile knots.
4265            crate::basis::auto_knot_vector_1d_quantile(data, n_knots, degree)
4266                .map_err(|e| e.to_string())?;
4267            Ok(BSplineKnotSpec::Automatic {
4268                num_internal_knots: Some(n_knots),
4269                placement: BSplineKnotPlacement::Quantile,
4270            })
4271        }
4272    }
4273}
4274
4275/// Reject unknown option keys with a focused error that names the term and
4276/// the offending key, plus suggests near-matches from the known-key list.
4277/// Without this, typos like `lengt_scale=0.1` or `nyu=5/2` are silently
4278/// dropped, the term uses the default, and the user has no idea why their
4279/// option had no effect.
4280pub fn validate_known_options(
4281    term_name: &str,
4282    options: &BTreeMap<String, String>,
4283    known: &[&str],
4284) -> Result<(), String> {
4285    let known_set: std::collections::BTreeSet<&&str> = known.iter().collect();
4286    for key in options.keys() {
4287        if !known_set.contains(&key.as_str()) {
4288            if term_name == "tensor" && is_tensor_k_axis_option_key(key) {
4289                continue;
4290            }
4291            // Suggest near-matches (substring or shared prefix ≥ 3).
4292            let key_l = key.to_ascii_lowercase();
4293            let mut suggestions: Vec<&str> = known
4294                .iter()
4295                .filter(|k| {
4296                    let kl = k.to_ascii_lowercase();
4297                    kl.contains(&key_l) || key_l.contains(&kl) || {
4298                        let n = kl
4299                            .chars()
4300                            .zip(key_l.chars())
4301                            .take_while(|(a, b)| a == b)
4302                            .count();
4303                        n >= 3
4304                    }
4305                })
4306                .copied()
4307                .collect();
4308            suggestions.sort_unstable();
4309            suggestions.dedup();
4310            let hint = if suggestions.is_empty() {
4311                String::new()
4312            } else {
4313                format!(" — did you mean one of [{}]?", suggestions.join(", "))
4314            };
4315            return Err(TermBuilderError::invalid_option(format!(
4316                "{term_name}() does not accept option `{key}`{hint}. Valid options: [{}]",
4317                {
4318                    let mut sorted = known.to_vec();
4319                    sorted.sort_unstable();
4320                    sorted.join(", ")
4321                }
4322            ))
4323            .to_string());
4324        }
4325    }
4326    Ok(())
4327}
4328
4329/// Private (engine-injected) option that caps the *default* spatial center
4330/// count for a secondary (distributional) predictor's smooth — see
4331/// `solver::fit_orchestration::apply_secondary_predictor_basis_parsimony` and #501.
4332///
4333/// It is deliberately NOT one of the user-facing count aliases recognised by
4334/// [`has_explicit_countwith_basis_alias`], so it never flips the spatial basis
4335/// onto the explicit (hard) center-placement strategy: the cap lowers the
4336/// *default* count while the `Auto` strategy is retained, so the count is still
4337/// softly reduced when the data can't support it.
4338pub const SECONDARY_CENTER_CAP_OPTION: &str = "__secondary_center_cap";
4339
4340/// Apply the secondary-predictor center cap to a *default* spatial center
4341/// count. A no-op when the cap option is absent (the common case) or when the
4342/// user supplied an explicit count (then `default_count` is ignored downstream
4343/// by [`parse_countwith_basis_alias`] anyway).
4344pub(crate) fn cap_default_spatial_centers(
4345    options: &BTreeMap<String, String>,
4346    default_count: usize,
4347) -> usize {
4348    match option_usize(options, SECONDARY_CENTER_CAP_OPTION) {
4349        Some(cap) => default_count.min(cap),
4350        None => default_count,
4351    }
4352}
4353
4354fn default_matern_center_count(n: usize, d: usize, planned_count: usize) -> usize {
4355    // #1074: the mgcv-sized basis cap (`k = 10·3^(d-1)`) was DELETED here too — it
4356    // masked the same over-sizing/under-penalization defect by shrinking the basis
4357    // rather than fixing the optimizer. The default now uses the generic n-scaling
4358    // plan. A small-n floor against a numerically-fragile two-column kernel block
4359    // is a legitimate degenerate guard and is kept. Explicit `k`/`centers` still
4360    // take full effect upstream.
4361    let low_n_floor = (d + 4).min(n);
4362    planned_count.max(low_n_floor).max(1)
4363}
4364
4365pub fn parse_countwith_basis_alias(
4366    options: &BTreeMap<String, String>,
4367    primarykey: &str,
4368    default_count: usize,
4369) -> Result<usize, String> {
4370    // Strict: reject unparseable values (e.g. `centers=many`, `centers=-1`,
4371    // `centers=1.5`) instead of silently dropping them and falling through
4372    // to the default. Without this the user gets the auto-inferred count
4373    // silently and never realizes their explicit option was ignored.
4374    let primary = option_usize_strict(options, primarykey)?;
4375    let basis_dim = option_usize_any_strict(
4376        options,
4377        &["k", "basis_dim", "basis-dim", "basisdim", "knots"],
4378    )?;
4379    if primary.is_some() && basis_dim.is_some() {
4380        return Err(TermBuilderError::incompatible_config(format!(
4381            "specify either {}=<count> or k=<basis_dim> (not both)",
4382            primarykey
4383        ))
4384        .to_string());
4385    }
4386    Ok(primary.or(basis_dim).unwrap_or(default_count))
4387}
4388
4389pub fn has_explicit_countwith_basis_alias(
4390    options: &BTreeMap<String, String>,
4391    primarykey: &str,
4392) -> bool {
4393    options.contains_key(primarykey)
4394        || ["k", "basis_dim", "basis-dim", "basisdim", "knots"]
4395            .iter()
4396            .any(|alias| options.contains_key(*alias))
4397}
4398
4399pub fn parse_cyclic_boundary(
4400    options: &BTreeMap<String, String>,
4401    minv: f64,
4402    maxv: f64,
4403) -> Result<OneDimensionalBoundary, String> {
4404    let cyclic = option_bool(options, "cyclic")
4405        .or_else(|| option_bool(options, "periodic"))
4406        .unwrap_or(false);
4407    if !cyclic {
4408        return Ok(OneDimensionalBoundary::Open);
4409    }
4410    let start = match option_numeric_expr(options, "period_start")? {
4411        Some(v) => v,
4412        None => option_numeric_expr(options, "start")?.unwrap_or(minv),
4413    };
4414    let end = match option_numeric_expr(options, "period_end")? {
4415        Some(v) => v,
4416        None => option_numeric_expr(options, "end")?.unwrap_or(maxv),
4417    };
4418    if end <= start {
4419        return Err(format!(
4420            "cyclic smooth requires period_end/end ({end}) > period_start/start ({start})"
4421        ));
4422    }
4423    Ok(OneDimensionalBoundary::Cyclic { start, end })
4424}
4425
4426/// Parse the periodic-uniform domain for a one-dimensional cyclic smooth.
4427///
4428/// Returns the `(domain_start, period)` pair derived from
4429/// `period_start` / `start`, `period_end` / `end`, falling back to the
4430/// data range `[minv, maxv)` when neither bound is provided. The period
4431/// must be strictly positive.
4432pub fn parse_periodic_domain_1d(
4433    options: &BTreeMap<String, String>,
4434    minv: f64,
4435    maxv: f64,
4436) -> Result<(f64, f64), String> {
4437    let start_opt = match option_numeric_expr(options, "period_start")? {
4438        Some(v) => Some(v),
4439        None => option_numeric_expr(options, "start")?,
4440    };
4441    let end_opt = match option_numeric_expr(options, "period_end")? {
4442        Some(v) => Some(v),
4443        None => option_numeric_expr(options, "end")?,
4444    };
4445    // Reject the pure data-range fallback. A B-spline periodic smooth that takes
4446    // its wrap from the observed [min, max] is sample-dependent and silently
4447    // wrong: uniform draws on a true period of 2π land on [ε, 2π−ε], so using
4448    // (max−min) as the period seams the curve with an off-by-ε discontinuity and
4449    // the fit drifts with the sample. (Unlike the radial closed-lattice Duchon
4450    // path, whose centers DO tile a full period, so its span-derive is exact —
4451    // see `parse_periodic_axes_option`.) Require the caller to name the period
4452    // explicitly via `period=`/`period_end`. The end is only defaulted to `maxv`
4453    // when a `period_start`/`start` was given (a half-open declaration); a bare
4454    // periodic smooth with neither bound is an error.
4455    if end_opt.is_none() && start_opt.is_none() {
4456        return Err(
4457            "periodic B-spline smooth requires an explicit period: pass period=<value> \
4458             (e.g. period=2*pi) or period_start=/period_end=. Deriving the period from the \
4459             observed data range is sample-dependent and produces an off-by-ε seam, so it is \
4460             not inferred."
4461                .to_string(),
4462        );
4463    }
4464    let start = start_opt.unwrap_or(minv);
4465    let end = end_opt.unwrap_or(maxv);
4466    if !(start.is_finite() && end.is_finite()) {
4467        return Err(format!(
4468            "periodic smooth domain requires finite endpoints, got ({start}, {end})"
4469        ));
4470    }
4471    if end <= start {
4472        return Err(format!(
4473            "periodic smooth requires period_end/end ({end}) > period_start/start ({start})"
4474        ));
4475    }
4476    Ok((start, end - start))
4477}
4478
4479fn parse_matern_nu(raw: &str) -> Result<MaternNu, String> {
4480    let trimmed = raw.trim();
4481    let lowered = trimmed.to_ascii_lowercase();
4482    match lowered.as_str() {
4483        "1/2" | "0.5" | "half" => return Ok(MaternNu::Half),
4484        "3/2" | "1.5" => return Ok(MaternNu::ThreeHalves),
4485        "5/2" | "2.5" => return Ok(MaternNu::FiveHalves),
4486        "7/2" | "3.5" => return Ok(MaternNu::SevenHalves),
4487        "9/2" | "4.5" => return Ok(MaternNu::NineHalves),
4488        _ => {}
4489    }
4490
4491    let value = if let Some((num, den)) = trimmed.split_once('/') {
4492        let num = num
4493            .trim()
4494            .parse::<f64>()
4495            .map_err(|err| format!("{}: {err}", unsupported_matern_nu_message(raw)))?;
4496        let den = den
4497            .trim()
4498            .parse::<f64>()
4499            .map_err(|err| format!("{}: {err}", unsupported_matern_nu_message(raw)))?;
4500        if den == 0.0 || !num.is_finite() || !den.is_finite() {
4501            return Err(unsupported_matern_nu_message(raw));
4502        }
4503        num / den
4504    } else {
4505        trimmed
4506            .parse::<f64>()
4507            .map_err(|err| format!("{}: {err}", unsupported_matern_nu_message(raw)))?
4508    };
4509
4510    const TOL: f64 = 1e-12;
4511    if (value - 0.5).abs() <= TOL {
4512        Ok(MaternNu::Half)
4513    } else if (value - 1.5).abs() <= TOL {
4514        Ok(MaternNu::ThreeHalves)
4515    } else if (value - 2.5).abs() <= TOL {
4516        Ok(MaternNu::FiveHalves)
4517    } else if (value - 3.5).abs() <= TOL {
4518        Ok(MaternNu::SevenHalves)
4519    } else if (value - 4.5).abs() <= TOL {
4520        Ok(MaternNu::NineHalves)
4521    } else {
4522        Err(unsupported_matern_nu_message(raw))
4523    }
4524}
4525
4526fn unsupported_matern_nu_message(raw: &str) -> String {
4527    TermBuilderError::unsupported_feature(format!(
4528        "unsupported Matern nu '{raw}'; supported half-integer values are 1/2, 3/2, 5/2, 7/2, and 9/2"
4529    ))
4530    .to_string()
4531}
4532
4533#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
4534pub enum DuchonPowerPolicy {
4535    Explicit(f64),
4536    /// No explicit `power=` given: defer to the cubic structural default, which
4537    /// the builder resolves dimension-aware as `s = (d − 1)/2` (so `φ(r) = r³`
4538    /// in every dimension). There is no triple-operator minimum any more.
4539    CubicStructuralDefault,
4540}
4541
4542pub fn parse_duchon_power_policy(
4543    options: &BTreeMap<String, String>,
4544) -> Result<DuchonPowerPolicy, String> {
4545    if let Some(raw_nu) = options.get("nu") {
4546        return Err(TermBuilderError::incompatible_config(format!(
4547            "Duchon smooths use power=<number>, not nu='{}'. Use power=1.5, power=2, etc.",
4548            raw_nu
4549        ))
4550        .to_string());
4551    }
4552    match options.get("power") {
4553        Some(raw) => {
4554            let value = raw.parse::<f64>().map_err(|err| {
4555                TermBuilderError::invalid_option(format!(
4556                    "invalid Duchon power '{}'; expected a non-negative number such as power=1.5 or power=2: {}",
4557                    raw, err
4558                ))
4559                .to_string()
4560            })?;
4561            if !value.is_finite() || value < 0.0 {
4562                return Err(TermBuilderError::invalid_option(format!(
4563                    "invalid Duchon power '{}'; expected a finite non-negative number such as power=1.5 or power=2",
4564                    raw
4565                ))
4566                .to_string());
4567            }
4568            Ok(DuchonPowerPolicy::Explicit(value))
4569        }
4570        None => Ok(DuchonPowerPolicy::CubicStructuralDefault),
4571    }
4572}
4573
4574pub fn parse_duchon_power(options: &BTreeMap<String, String>) -> Result<f64, String> {
4575    match parse_duchon_power_policy(options)? {
4576        DuchonPowerPolicy::Explicit(power) => Ok(power),
4577        // Context-free placeholder: the bare option parser has no column count,
4578        // so it cannot compute the dimension-aware cubic power `s = (d − 1)/2`.
4579        // The dimension-aware resolution happens later in `build_smooth_basis`;
4580        // this 1.5 is only a stand-in for callers that need a concrete number
4581        // without data context (e.g. round-trip parser tests).
4582        DuchonPowerPolicy::CubicStructuralDefault => Ok(1.5),
4583    }
4584}
4585
4586pub fn parse_duchon_order(
4587    options: &BTreeMap<String, String>,
4588) -> Result<DuchonNullspaceOrder, String> {
4589    match options.get("order") {
4590        // Structural cubic Duchon is affine-by-default: an unspecified order is
4591        // the `Linear` (constant + linear) null space, matching the magic
4592        // default. An explicit `order=0` still selects the constant-only space.
4593        None => Ok(DuchonNullspaceOrder::Linear),
4594        Some(raw) => match raw.parse::<usize>() {
4595            Ok(0) => Ok(DuchonNullspaceOrder::Zero),
4596            Ok(1) => Ok(DuchonNullspaceOrder::Linear),
4597            Ok(other) => Ok(DuchonNullspaceOrder::Degree(other)),
4598            Err(_) => Err(TermBuilderError::invalid_option(format!(
4599                "invalid Duchon order '{}'; expected a non-negative integer such as order=0, order=1, or order=2",
4600                raw
4601            ))
4602            .to_string()),
4603        },
4604    }
4605}
4606
4607fn parse_matern_identifiability(
4608    options: &BTreeMap<String, String>,
4609) -> Result<MaternIdentifiability, TermBuilderError> {
4610    let Some(raw) = options.get("identifiability").map(String::as_str) else {
4611        return Ok(MaternIdentifiability::default());
4612    };
4613    match raw.trim().to_ascii_lowercase().as_str() {
4614        "none" => Ok(MaternIdentifiability::None),
4615        "sum_tozero" | "sum-to-zero" | "center_sum_tozero" | "center-sum-to-zero" | "centered" => {
4616            Ok(MaternIdentifiability::CenterSumToZero)
4617        }
4618        "linear" | "center_linear_orthogonal" | "center-linear-orthogonal" => {
4619            Ok(MaternIdentifiability::CenterLinearOrthogonal)
4620        }
4621        other => Err(TermBuilderError::unsupported_feature(format!(
4622            "invalid Matérn identifiability '{other}'; expected one of: none, sum_tozero, linear"
4623        ))),
4624    }
4625}
4626
4627fn parse_spatial_identifiability(
4628    options: &BTreeMap<String, String>,
4629) -> Result<SpatialIdentifiability, TermBuilderError> {
4630    let Some(raw) = options.get("identifiability").map(String::as_str) else {
4631        return Ok(SpatialIdentifiability::default());
4632    };
4633    match raw.trim().to_ascii_lowercase().as_str() {
4634        "none" => Ok(SpatialIdentifiability::None),
4635        "orthogonal"
4636        | "orthogonal_to_parametric"
4637        | "orthogonal-to-parametric"
4638        | "parametric_orthogonal" => Ok(SpatialIdentifiability::OrthogonalToParametric),
4639        "frozen" => Err(TermBuilderError::unsupported_feature(
4640            "spatial identifiability 'frozen' is internal-only; use none or orthogonal_to_parametric",
4641        )),
4642        other => Err(TermBuilderError::unsupported_feature(format!(
4643            "invalid spatial identifiability '{other}'; expected one of: none, orthogonal_to_parametric"
4644        ))),
4645    }
4646}
4647
4648#[cfg(test)]
4649mod tests {
4650    use super::*;
4651    use crate::inference::formula_dsl::parse_formula;
4652    use gam_data::{DataSchema, SchemaColumn};
4653    use ndarray::Array2;
4654    use std::collections::BTreeMap;
4655
4656    fn continuous_dataset(headers: &[&str], rows: Vec<Vec<f64>>) -> Dataset {
4657        let nrows = rows.len();
4658        let ncols = headers.len();
4659        let values = Array2::from_shape_vec(
4660            (nrows, ncols),
4661            rows.into_iter().flat_map(|row| row.into_iter()).collect(),
4662        )
4663        .expect("rectangular test data");
4664        Dataset {
4665            headers: headers.iter().map(|name| name.to_string()).collect(),
4666            values,
4667            schema: DataSchema {
4668                columns: headers
4669                    .iter()
4670                    .map(|name| SchemaColumn {
4671                        name: name.to_string(),
4672                        kind: ColumnKindTag::Continuous,
4673                        levels: vec![],
4674                    })
4675                    .collect(),
4676            },
4677            column_kinds: vec![ColumnKindTag::Continuous; ncols],
4678        }
4679    }
4680
4681    fn factor_dataset() -> Dataset {
4682        let rows = (0..24)
4683            .map(|i| {
4684                let x = i as f64 / 23.0;
4685                let g = (i % 2) as f64;
4686                vec![x + g, x, g]
4687            })
4688            .collect::<Vec<_>>();
4689        Dataset {
4690            headers: vec!["y".into(), "x".into(), "g".into()],
4691            values: Array2::from_shape_vec(
4692                (rows.len(), 3),
4693                rows.into_iter().flat_map(|row| row.into_iter()).collect(),
4694            )
4695            .expect("rectangular factor test data"),
4696            schema: DataSchema {
4697                columns: vec![
4698                    SchemaColumn {
4699                        name: "y".into(),
4700                        kind: ColumnKindTag::Continuous,
4701                        levels: vec![],
4702                    },
4703                    SchemaColumn {
4704                        name: "x".into(),
4705                        kind: ColumnKindTag::Continuous,
4706                        levels: vec![],
4707                    },
4708                    SchemaColumn {
4709                        name: "g".into(),
4710                        kind: ColumnKindTag::Categorical,
4711                        levels: vec!["a".into(), "b".into()],
4712                    },
4713                ],
4714            },
4715            column_kinds: vec![
4716                ColumnKindTag::Continuous,
4717                ColumnKindTag::Continuous,
4718                ColumnKindTag::Categorical,
4719            ],
4720        }
4721    }
4722
4723    /// #1378: the DEFAULT univariate `s(x, bs="tp")` must build a *modest*
4724    /// mgcv-sized basis, not the n-scaled spatial heuristic. The oversized
4725    /// default basis left the two-penalty REML ρ-surface with a flat valley
4726    /// whose optimizer landing point depended on row order, breaking
4727    /// row-permutation invariance. Pin the default 1-D center count so a
4728    /// regression that reinstates the n-scaled default trips here, fast, with
4729    /// no fit/optimizer in the loop.
4730    #[test]
4731    fn default_univariate_thinplate_basis_dim_is_modest() {
4732        // n = 300 (the #1378 scenario): the n-scaled spatial heuristic would
4733        // request ~75 centers here. The modest default must stay near k = 10.
4734        let n = 300usize;
4735        let rows: Vec<Vec<f64>> = (0..n)
4736            .map(|i| {
4737                let x = -3.0 + 6.0 * (i as f64) / ((n - 1) as f64);
4738                vec![x.sin(), x]
4739            })
4740            .collect();
4741        let ds = continuous_dataset(&["y", "x"], rows);
4742
4743        let mut options = BTreeMap::new();
4744        options.insert("bs".to_string(), "tp".to_string());
4745
4746        let mut notes = Vec::new();
4747        let basis = build_smooth_basis(
4748            SmoothKind::S,
4749            &["x".to_string()],
4750            &[1],
4751            &options,
4752            &ds,
4753            &mut notes,
4754            &ResourcePolicy::default_library(),
4755            1,
4756        )
4757        .expect("build default univariate tp smooth");
4758
4759        let centers = match &basis {
4760            SmoothBasisSpec::ThinPlate { spec, .. } => match &spec.center_strategy {
4761                CenterStrategy::Auto(inner) => match inner.as_ref() {
4762                    CenterStrategy::FarthestPoint { num_centers }
4763                    | CenterStrategy::EqualMass { num_centers }
4764                    | CenterStrategy::EqualMassCovarRepresentative { num_centers }
4765                    | CenterStrategy::KMeans { num_centers, .. } => *num_centers,
4766                    other => panic!("unexpected auto inner center strategy: {other:?}"),
4767                },
4768                CenterStrategy::FarthestPoint { num_centers }
4769                | CenterStrategy::EqualMass { num_centers }
4770                | CenterStrategy::EqualMassCovarRepresentative { num_centers }
4771                | CenterStrategy::KMeans { num_centers, .. } => *num_centers,
4772                other => panic!("unexpected center strategy: {other:?}"),
4773            },
4774            other => panic!("expected ThinPlate basis, got {other:?}"),
4775        };
4776
4777        // #1074: the mgcv-sized basis-dim ceiling assertion was removed with the
4778        // cap it tested. The default tp basis is now n-scaled; we only assert it
4779        // still builds a usable basis.
4780        assert!(
4781            centers >= 1,
4782            "default univariate tp must still build a usable basis (centers={centers})",
4783        );
4784    }
4785
4786    /// gam#1629: a default 2-D `matern(x1, x2)` (no explicit `length_scale`)
4787    /// must leave the length-scale at the `0.0` auto sentinel — NOT the full
4788    /// data diameter — so the planner's `auto_init_length_scale_in_place` seeds
4789    /// it on the wiggly/resolving side (`max_range / sqrt(n)`), the same regime
4790    /// thin-plate uses. The previous `default_matern_length_scale` returned the
4791    /// full diameter, which is non-zero, so the `0.0`-gated auto-init was a
4792    /// no-op and the κ-optimizer started in the over-smoothed corner and parked
4793    /// there (truth-RMSE ~6× worse than thin-plate/tensor on identical
4794    /// high-frequency 2-D surfaces, insensitive to `k`). This pins the corrected
4795    /// seed geometry without a fit/optimizer in the loop.
4796    #[test]
4797    fn default_matern_2d_seeds_resolving_length_scale_not_overscaled_diameter() {
4798        // A fine multi-frequency 2-D grid (the #1629 reproduction shape): the
4799        // data diameter is O(1.4) in each axis; the resolving seed must be far
4800        // smaller than the diameter so high-frequency structure stays reachable.
4801        let side = 24usize; // n = 576
4802        let mut rows: Vec<Vec<f64>> = Vec::with_capacity(side * side);
4803        for i in 0..side {
4804            for j in 0..side {
4805                let x1 = i as f64 / (side - 1) as f64; // [0, 1]
4806                let x2 = j as f64 / (side - 1) as f64; // [0, 1]
4807                let y = (6.0 * x1).sin() * (6.0 * x2).cos();
4808                rows.push(vec![y, x1, x2]);
4809            }
4810        }
4811        let n = rows.len();
4812        let ds = continuous_dataset(&["y", "x1", "x2"], rows);
4813
4814        let mut options = BTreeMap::new();
4815        options.insert("bs".to_string(), "gp".to_string()); // gp ⇒ Matérn
4816        let mut notes = Vec::new();
4817        let mut basis = build_smooth_basis(
4818            SmoothKind::S,
4819            &["x1".to_string(), "x2".to_string()],
4820            &[1, 2],
4821            &options,
4822            &ds,
4823            &mut notes,
4824            &ResourcePolicy::default_library(),
4825            1,
4826        )
4827        .expect("build default 2-D matern smooth");
4828
4829        // (1) The builder must emit the auto sentinel, not a baked-in diameter.
4830        let (feature_cols, seeded_length_scale) = match &basis {
4831            SmoothBasisSpec::Matern {
4832                feature_cols, spec, ..
4833            } => (feature_cols.clone(), spec.length_scale),
4834            other => panic!("expected Matern basis, got {other:?}"),
4835        };
4836        assert_eq!(
4837            seeded_length_scale, 0.0,
4838            "default matern() must leave length_scale at the 0.0 auto sentinel \
4839             (got {seeded_length_scale}); a non-zero diameter default re-enters the \
4840             over-smoothed basin and disables the planner's wiggly-side auto-init",
4841        );
4842
4843        // (2) After the shared auto-init runs, the realized length-scale must
4844        // land in the resolving regime: `max_range / sqrt(n)`, far below the
4845        // data diameter. This is the seed the κ-optimizer starts REML from.
4846        crate::smooth::auto_init_length_scale_in_basis(ds.values.view(), &mut basis);
4847        let realized = match &basis {
4848            SmoothBasisSpec::Matern { spec, .. } => spec.length_scale,
4849            other => panic!("expected Matern basis after auto-init, got {other:?}"),
4850        };
4851        let expected =
4852            crate::smooth::auto_initial_length_scale(ds.values.view(), &feature_cols);
4853        assert!(
4854            (realized - expected).abs() <= 1e-12,
4855            "auto-init must seed the wiggly-side length scale max_range/sqrt(n) \
4856             (expected {expected}, got {realized})",
4857        );
4858
4859        // Sanity: the resolving seed is well below the per-axis range (≈1.0).
4860        // Before the fix the seed was the full diameter (≈√2 ≈ 1.414); the
4861        // resolving seed here is ≈ 1.0 / sqrt(576) ≈ 0.042, ~30× smaller.
4862        let max_range = 1.0_f64; // each axis spans [0, 1]
4863        assert!(
4864            realized < max_range / 4.0,
4865            "matern seed length_scale {realized} must be in the resolving regime, \
4866             not the over-smoothed diameter corner (n={n}, max_range≈{max_range})",
4867        );
4868    }
4869
4870    /// gam#1778: `matern(..., periodic=true)` and `thinplate(..., periodic=true)`
4871    /// must be ACCEPTED. The squash-merge that wired periodic support into the
4872    /// matern/thinplate basis specs forgot to add the periodic option keys to
4873    /// those two builders' `validate_known_options` whitelists (only `duchon`
4874    /// got both), so `periodic=`/`period=`/`cyclic=`/`period_start=`/`period_end=`
4875    /// were rejected as unknown options even though the spec/builder consume them.
4876    /// Before the whitelist fix this returned an "unknown option" error.
4877    #[test]
4878    fn matern_and_thinplate_accept_periodic_option() {
4879        let n = 200usize;
4880        let rows: Vec<Vec<f64>> = (0..n)
4881            .map(|i| {
4882                let x = -3.0 + 6.0 * (i as f64) / ((n - 1) as f64);
4883                vec![x.sin(), x]
4884            })
4885            .collect();
4886        let ds = continuous_dataset(&["y", "x"], rows);
4887
4888        // matern() with periodic=true must build without an unknown-option error.
4889        let mut matern_opts = BTreeMap::new();
4890        matern_opts.insert("bs".to_string(), "gp".to_string()); // gp ⇒ Matérn
4891        matern_opts.insert("periodic".to_string(), "true".to_string());
4892        let mut notes = Vec::new();
4893        let matern_basis = build_smooth_basis(
4894            SmoothKind::S,
4895            &["x".to_string()],
4896            &[1],
4897            &matern_opts,
4898            &ds,
4899            &mut notes,
4900            &ResourcePolicy::default_library(),
4901            1,
4902        )
4903        .expect("matern(x, periodic=true) must be accepted");
4904        match &matern_basis {
4905            SmoothBasisSpec::Matern { spec, .. } => assert!(
4906                spec.periodic.is_some(),
4907                "periodic=true must thread a Some(periodic) into the matern spec",
4908            ),
4909            other => panic!("expected Matern basis, got {other:?}"),
4910        }
4911
4912        // thinplate()/tps() with periodic=true must likewise be accepted.
4913        let mut tps_opts = BTreeMap::new();
4914        tps_opts.insert("bs".to_string(), "tp".to_string());
4915        tps_opts.insert("periodic".to_string(), "true".to_string());
4916        let mut notes = Vec::new();
4917        let tps_basis = build_smooth_basis(
4918            SmoothKind::S,
4919            &["x".to_string()],
4920            &[1],
4921            &tps_opts,
4922            &ds,
4923            &mut notes,
4924            &ResourcePolicy::default_library(),
4925            1,
4926        )
4927        .expect("thinplate(x, periodic=true) must be accepted");
4928        match &tps_basis {
4929            SmoothBasisSpec::ThinPlate { spec, .. } => assert!(
4930                spec.periodic.is_some(),
4931                "periodic=true must thread a Some(periodic) into the thinplate spec",
4932            ),
4933            other => panic!("expected ThinPlate basis, got {other:?}"),
4934        }
4935    }
4936
4937    /// Regression: an explicit scalar `periodic=false` on a radial spatial smooth
4938    /// must build a NON-periodic basis. The scalar-boolean shortcut used to emit
4939    /// `Some(vec![None; dim])`, which the 1-D radial builders route on via
4940    /// `spec.periodic.is_some()` (and the Duchon arm even back-fills the data
4941    /// range into a lone `None`), so `periodic=false` silently produced a
4942    /// *periodic* smooth — the opposite of what was asked. The spec's `periodic`
4943    /// field must be `None` for every radial base (matern / thinplate / duchon),
4944    /// matching the bracketed `[false]` form.
4945    #[test]
4946    fn scalar_periodic_false_builds_non_periodic_radial_smooth() {
4947        let n = 200usize;
4948        let rows: Vec<Vec<f64>> = (0..n)
4949            .map(|i| {
4950                let x = -3.0 + 6.0 * (i as f64) / ((n - 1) as f64);
4951                vec![x.sin(), x]
4952            })
4953            .collect();
4954        let ds = continuous_dataset(&["y", "x"], rows);
4955
4956        let build = |bs: &str| -> SmoothBasisSpec {
4957            let mut opts = BTreeMap::new();
4958            opts.insert("bs".to_string(), bs.to_string());
4959            opts.insert("periodic".to_string(), "false".to_string());
4960            let mut notes = Vec::new();
4961            build_smooth_basis(
4962                SmoothKind::S,
4963                &["x".to_string()],
4964                &[1],
4965                &opts,
4966                &ds,
4967                &mut notes,
4968                &ResourcePolicy::default_library(),
4969                1,
4970            )
4971            .unwrap_or_else(|e| panic!("s(x, bs={bs}, periodic=false) must be accepted: {e}"))
4972        };
4973
4974        match &build("gp") {
4975            SmoothBasisSpec::Matern { spec, .. } => assert!(
4976                spec.periodic.is_none(),
4977                "periodic=false must leave the matern spec non-periodic, got {:?}",
4978                spec.periodic
4979            ),
4980            other => panic!("expected Matern basis, got {other:?}"),
4981        }
4982        match &build("tp") {
4983            SmoothBasisSpec::ThinPlate { spec, .. } => assert!(
4984                spec.periodic.is_none(),
4985                "periodic=false must leave the thinplate spec non-periodic, got {:?}",
4986                spec.periodic
4987            ),
4988            other => panic!("expected ThinPlate basis, got {other:?}"),
4989        }
4990        match &build("duchon") {
4991            SmoothBasisSpec::Duchon { spec, .. } => assert!(
4992                spec.periodic.is_none(),
4993                "periodic=false must leave the duchon spec non-periodic (no data-range \
4994                 back-fill), got {:?}",
4995                spec.periodic
4996            ),
4997            other => panic!("expected Duchon basis, got {other:?}"),
4998        }
4999    }
5000
5001    fn inferred_tensor_basis_product(ds: &Dataset) -> usize {
5002        let parsed = parse_formula("y ~ te(theta, h)").expect("parse tensor formula");
5003        let col_map = ds.column_map();
5004        let mut notes = Vec::new();
5005        let terms = build_termspec(
5006            &parsed.terms,
5007            ds,
5008            &col_map,
5009            &mut notes,
5010            &ResourcePolicy::default_library(),
5011        )
5012        .expect("build tensor termspec");
5013        let SmoothBasisSpec::TensorBSpline { spec, .. } = &terms.smooth_terms[0].basis else {
5014            panic!("expected tensor smooth");
5015        };
5016        spec.marginalspecs
5017            .iter()
5018            .map(|marginal| match marginal.knotspec {
5019                BSplineKnotSpec::Generate {
5020                    num_internal_knots, ..
5021                } => num_internal_knots + marginal.degree + 1,
5022                BSplineKnotSpec::PeriodicUniform { num_basis, .. } => num_basis,
5023                BSplineKnotSpec::Automatic {
5024                    num_internal_knots: Some(num_internal_knots),
5025                    ..
5026                } => num_internal_knots + marginal.degree + 1,
5027                BSplineKnotSpec::Automatic {
5028                    num_internal_knots: None,
5029                    ..
5030                } => panic!("test helper cannot infer automatic knot count"),
5031                BSplineKnotSpec::Provided(ref knots) => {
5032                    knots.len().saturating_sub(marginal.degree + 1)
5033                }
5034                // cr basis dimension equals the knot count (no degree offset).
5035                BSplineKnotSpec::NaturalCubicRegression { ref knots } => knots.len(),
5036            })
5037            .product()
5038    }
5039
5040    fn tensor_margin_basis_sizes(ds: &Dataset, formula: &str) -> Vec<usize> {
5041        let parsed = parse_formula(formula).expect("parse tensor formula");
5042        let col_map = ds.column_map();
5043        let mut notes = Vec::new();
5044        let terms = build_termspec(
5045            &parsed.terms,
5046            ds,
5047            &col_map,
5048            &mut notes,
5049            &ResourcePolicy::default_library(),
5050        )
5051        .expect("build tensor termspec");
5052        let SmoothBasisSpec::TensorBSpline { spec, .. } = &terms.smooth_terms[0].basis else {
5053            panic!("expected tensor smooth");
5054        };
5055        spec.marginalspecs
5056            .iter()
5057            .map(|marginal| match marginal.knotspec {
5058                BSplineKnotSpec::Generate {
5059                    num_internal_knots, ..
5060                } => num_internal_knots + marginal.degree + 1,
5061                BSplineKnotSpec::PeriodicUniform { num_basis, .. } => num_basis,
5062                BSplineKnotSpec::Automatic {
5063                    num_internal_knots: Some(num_internal_knots),
5064                    ..
5065                } => num_internal_knots + marginal.degree + 1,
5066                BSplineKnotSpec::Automatic {
5067                    num_internal_knots: None,
5068                    ..
5069                } => panic!("test helper cannot infer automatic knot count"),
5070                BSplineKnotSpec::Provided(ref knots) => {
5071                    knots.len().saturating_sub(marginal.degree + 1)
5072                }
5073                // cr basis dimension equals the knot count (no degree offset).
5074                BSplineKnotSpec::NaturalCubicRegression { ref knots } => knots.len(),
5075            })
5076            .collect()
5077    }
5078
5079    #[test]
5080    fn validate_known_options_lists_valid_option_names_for_unknown_parameter() {
5081        let mut options = BTreeMap::new();
5082        options.insert("lengt_scale".to_string(), "0.25".to_string());
5083        let err = validate_known_options(
5084            "matern",
5085            &options,
5086            &["type", "bs", "length_scale", "centers", "k", "nu"],
5087        )
5088        .expect_err("unknown smooth option should be rejected");
5089        assert!(
5090            err.contains("matern() does not accept option `lengt_scale`"),
5091            "error should name the invalid option, got: {err}"
5092        );
5093        assert!(
5094            err.contains("did you mean one of [length_scale]"),
5095            "error should suggest the closest valid option, got: {err}"
5096        );
5097        assert!(
5098            err.contains("Valid options: ["),
5099            "error should list valid option names, got: {err}"
5100        );
5101    }
5102
5103    #[test]
5104    fn tensor_k_accepts_square_bracket_per_margin_list() {
5105        let ds = continuous_dataset(
5106            &["y", "x", "z"],
5107            (0..40)
5108                .map(|i| {
5109                    let x = i as f64 / 39.0;
5110                    let z = ((i * 7) % 40) as f64 / 39.0;
5111                    vec![x.sin() + z.cos(), x, z]
5112                })
5113                .collect(),
5114        );
5115
5116        assert_eq!(
5117            tensor_margin_basis_sizes(&ds, "y ~ te(x, z, k=[5, 6])"),
5118            vec![5, 6],
5119            "square-bracket k lists should materialize the requested per-margin values"
5120        );
5121    }
5122
5123    /// #1776 / #1752: a bare doubly-cyclic tensor `te(x, z, bs=c('cc','cc'))`
5124    /// with NO explicit `period=` must build — each cyclic margin wraps on its
5125    /// own observed `[min, max]` data span (mirroring mgcv's `bs="cc"` and the
5126    /// 1-D cyclic fallback), instead of hard-erroring "periodic but requires an
5127    /// explicit period". The periodic-radial refactor (c8c3192fa) replaced that
5128    /// fallback with an unconditional `period=`-required error and orphaned the
5129    /// `margin_is_cc` binding that drives it (the #1776 dead-binding `-D
5130    /// warnings` build break). This pins the restored data-range derivation so a
5131    /// regression that drops the `None if margin_is_cc` branch trips here, fast,
5132    /// with no fit/optimizer in the loop.
5133    #[test]
5134    fn bare_doubly_cyclic_tensor_derives_period_from_data_range_1776() {
5135        let ds = continuous_dataset(
5136            &["y", "x", "z"],
5137            (0..40)
5138                .map(|i| {
5139                    let x = i as f64 / 39.0;
5140                    let z = ((i * 7) % 40) as f64 / 39.0;
5141                    vec![x.sin() + z.cos(), x, z]
5142                })
5143                .collect(),
5144        );
5145
5146        let parsed = parse_formula("y ~ te(x, z, bs=c('cc','cc'))")
5147            .expect("parse doubly-cyclic tensor formula");
5148        let col_map = ds.column_map();
5149        let mut notes = Vec::new();
5150        // Must NOT hard-error: the bare cyclic margins derive their period from
5151        // the observed data range (the restored #1752 fallback).
5152        let terms = build_termspec(
5153            &parsed.terms,
5154            &ds,
5155            &col_map,
5156            &mut notes,
5157            &ResourcePolicy::default_library(),
5158        )
5159        .expect(
5160            "bare cc-cc tensor must build via the data-range period fallback (#1776/#1752), \
5161             not hard-error on a missing explicit period",
5162        );
5163        let SmoothBasisSpec::TensorBSpline { spec, .. } = &terms.smooth_terms[0].basis else {
5164            panic!("expected tensor smooth");
5165        };
5166        assert_eq!(
5167            spec.marginalspecs.len(),
5168            2,
5169            "te(x, z) builds exactly two tensor margins"
5170        );
5171        for (axis, marginal) in spec.marginalspecs.iter().enumerate() {
5172            assert!(
5173                matches!(marginal.knotspec, BSplineKnotSpec::PeriodicUniform { .. }),
5174                "cyclic margin {axis} must build a periodic (wrapped) knotspec from the \
5175                 data range, got {:?}",
5176                marginal.knotspec
5177            );
5178        }
5179    }
5180
5181    #[test]
5182    fn parse_cylinder_periodic_options_match_requested_forms() {
5183        let mut opts = BTreeMap::new();
5184        opts.insert("periodic".to_string(), "[0]".to_string());
5185        opts.insert("period".to_string(), "[2*pi, None]".to_string());
5186        let axes = parse_periodic_axes(&opts, 2).expect("axes");
5187        let periods = parse_periods(&opts, &axes).expect("periods");
5188        assert_eq!(axes, vec![true, false]);
5189        assert!((periods[0].unwrap() - 2.0 * std::f64::consts::PI).abs() < 1e-12);
5190        assert_eq!(periods[1], None);
5191
5192        let mut boundary_opts = BTreeMap::new();
5193        boundary_opts.insert(
5194            "boundary".to_string(),
5195            "['periodic', 'natural']".to_string(),
5196        );
5197        boundary_opts.insert("period".to_string(), "[2*pi, None]".to_string());
5198        let boundary_axes = parse_periodic_axes(&boundary_opts, 2).expect("boundary axes");
5199        let boundary_periods =
5200            parse_periods(&boundary_opts, &boundary_axes).expect("boundary periods");
5201        assert_eq!(boundary_axes, vec![true, false]);
5202        assert!((boundary_periods[0].unwrap() - 2.0 * std::f64::consts::PI).abs() < 1e-12);
5203        assert_eq!(boundary_periods[1], None);
5204
5205        let mut unicode_opts = BTreeMap::new();
5206        unicode_opts.insert("periodic".to_string(), "[0,1]".to_string());
5207        unicode_opts.insert("period".to_string(), "[2π, τ]".to_string());
5208        let unicode_axes = parse_periodic_axes(&unicode_opts, 2).expect("unicode axes");
5209        let unicode_periods = parse_periods(&unicode_opts, &unicode_axes).expect("unicode periods");
5210        assert_eq!(unicode_axes, vec![true, true]);
5211        assert!((unicode_periods[0].unwrap() - 2.0 * std::f64::consts::PI).abs() < 1e-12);
5212        assert!((unicode_periods[1].unwrap() - std::f64::consts::TAU).abs() < 1e-12);
5213    }
5214
5215    /// The tensor boundary-token guard must ACCEPT `clamped`/`open` (the
5216    /// B-spline-clamped, non-periodic margin spelling) alongside the periodic
5217    /// selectors and the other inert non-periodic markers, and still REJECT a
5218    /// genuine endpoint constraint like `anchored`. This locks the #415 /
5219    /// cylinder fix (`te(theta, z, boundary=['periodic','clamped'])`, mgcv
5220    /// `te(bs=c("cc","ps"))`) in the fast unit lane — the end-to-end cylinder
5221    /// recovery test is R-gated (`run_r` + mgcv), so without this the guard
5222    /// regressing back to rejecting `clamped` would slip through CPU CI.
5223    #[test]
5224    fn tensor_boundary_tokens_accept_clamped_open_reject_anchored() {
5225        fn boundary(raw: &str, dim: usize) -> Result<(), String> {
5226            let mut opts = BTreeMap::new();
5227            opts.insert("boundary".to_string(), raw.to_string());
5228            validate_tensor_boundary_tokens(&opts, dim)
5229        }
5230
5231        // Mixed periodic + clamped (the cylinder) and its bare/case/quote
5232        // variants are all accepted.
5233        for raw in [
5234            "['periodic', 'clamped']",
5235            "['periodic', 'open']",
5236            "['cc', 'clamped']",
5237            "['clamped', 'natural']",
5238            "[Periodic, CLAMPED]",
5239            "c('cc', 'clamped')",  // mgcv-style c(...) vector form round-trips
5240        ] {
5241            assert!(
5242                boundary(raw, 2).is_ok(),
5243                "boundary={raw:?} must be accepted (clamped/open/inert non-periodic markers)"
5244            );
5245        }
5246
5247        // `bc=` is an accepted alias for `boundary=`.
5248        let mut bc_opts = BTreeMap::new();
5249        bc_opts.insert("bc".to_string(), "['periodic', 'clamped']".to_string());
5250        assert!(validate_tensor_boundary_tokens(&bc_opts, 2).is_ok());
5251
5252        // A genuine endpoint constraint has no ordinary-margin meaning on a
5253        // tensor and must still be surfaced as a clean unsupported-feature error
5254        // rather than silently dropped.
5255        let err = boundary("['periodic', 'anchored']", 2)
5256            .expect_err("anchored endpoint constraint must be rejected on a tensor margin");
5257        assert!(
5258            err.contains("anchored") && err.contains("not supported"),
5259            "rejection must name the offending token and be an unsupported-feature error: {err}"
5260        );
5261
5262        // Absent boundary/bc is a no-op success.
5263        assert!(validate_tensor_boundary_tokens(&BTreeMap::new(), 2).is_ok());
5264    }
5265
5266    #[test]
5267    fn parse_single_axis_periodic_zero_as_axis_not_false() {
5268        let mut opts = BTreeMap::new();
5269        opts.insert("periodic".to_string(), "[0]".to_string());
5270        opts.insert("period".to_string(), "2*pi".to_string());
5271        opts.insert("origin".to_string(), "0".to_string());
5272        let axes = parse_periodic_axes(&opts, 1).expect("axes");
5273        let periods = parse_periods(&opts, &axes).expect("periods");
5274        let origins = parse_period_origins(&opts, &axes).expect("origins");
5275        assert_eq!(axes, vec![true]);
5276        assert!((periods[0].unwrap() - 2.0 * std::f64::consts::PI).abs() < 1e-12);
5277        assert_eq!(origins[0], Some(0.0));
5278    }
5279
5280    #[test]
5281    fn one_dimensional_bspline_accepts_boundary_periodic() {
5282        let ds = continuous_dataset(
5283            &["y", "theta"],
5284            (0..16)
5285                .map(|i| {
5286                    let theta = std::f64::consts::TAU * i as f64 / 16.0;
5287                    vec![theta.sin(), theta]
5288                })
5289                .collect(),
5290        );
5291        let parsed = parse_formula("y ~ s(theta, boundary=periodic, period=2*pi, origin=0, k=8)")
5292            .expect("parse");
5293        let col_map = ds.column_map();
5294        let mut notes = Vec::new();
5295        let terms = build_termspec(
5296            &parsed.terms,
5297            &ds,
5298            &col_map,
5299            &mut notes,
5300            &gam_runtime::resource::ResourcePolicy::default_library(),
5301        )
5302        .expect("periodic boundary should build");
5303        let SmoothBasisSpec::BSpline1D { spec, .. } = &terms.smooth_terms[0].basis else {
5304            panic!("expected 1D B-spline");
5305        };
5306        assert!(matches!(
5307            &spec.knotspec,
5308            BSplineKnotSpec::PeriodicUniform {
5309                data_range,
5310                num_basis: 8
5311            } if *data_range == (0.0, std::f64::consts::TAU)
5312        ));
5313    }
5314
5315    #[test]
5316    fn univariate_smooth_accepts_mgcv_cubic_regression_aliases() {
5317        let ds = continuous_dataset(
5318            &["y", "x"],
5319            (0..32)
5320                .map(|i| {
5321                    let x = i as f64 / 31.0;
5322                    vec![x * x, x]
5323                })
5324                .collect(),
5325        );
5326        let col_map = ds.column_map();
5327
5328        for (selector, expect_double_penalty) in [("cr", false), ("cs", true)] {
5329            let formula = format!("y ~ s(x, bs='{selector}')");
5330            let parsed = parse_formula(&formula).expect("parse cr/cs smooth");
5331            let mut notes = Vec::new();
5332            let terms = build_termspec(
5333                &parsed.terms,
5334                &ds,
5335                &col_map,
5336                &mut notes,
5337                &gam_runtime::resource::ResourcePolicy::default_library(),
5338            )
5339            .unwrap_or_else(|err| panic!("bs='{selector}' must build a 1-D smooth, got: {err:?}"));
5340            let SmoothBasisSpec::BSpline1D { spec, .. } = &terms.smooth_terms[0].basis else {
5341                panic!(
5342                    "bs='{selector}' must lower to a BSpline1D; got {:?}",
5343                    terms.smooth_terms[0].basis
5344                );
5345            };
5346            assert_eq!(
5347                spec.double_penalty, expect_double_penalty,
5348                "bs='{selector}' must default double_penalty to mgcv's convention \
5349                 (cr=no-shrinkage, cs=shrinkage); got double_penalty={}",
5350                spec.double_penalty
5351            );
5352        }
5353    }
5354
5355    #[test]
5356    fn univariate_ps_small_k_degree_reduces_through_build(/* gam#1130 */) {
5357        // mgcv accepts `s(x, bs="ps", k=3)` (and the default cubic-regression
5358        // `s(x, k=3)`) by silently reducing the cubic basis to a quadratic.
5359        // The univariate ps/bspline build path used to reject this with
5360        // "k too small for degree 3"; it must now lower to a degree-2 basis
5361        // with zero internal knots (num_basis = k = 3), matching the te(...)
5362        // margin behaviour fixed in b75f55a91. Verified across the ps alias
5363        // and the default (cr) selector that both route through
5364        // parse_ps_internal_knots.
5365        let ds = continuous_dataset(
5366            &["y", "x"],
5367            (0..32)
5368                .map(|i| {
5369                    let x = i as f64 / 31.0;
5370                    vec![x * x, x]
5371                })
5372                .collect(),
5373        );
5374        let col_map = ds.column_map();
5375
5376        for formula in ["y ~ s(x, bs='ps', k=3)", "y ~ s(x, k=3)"] {
5377            let parsed = parse_formula(formula).expect("parse small-k ps/cr smooth");
5378            let mut notes = Vec::new();
5379            let terms = build_termspec(
5380                &parsed.terms,
5381                &ds,
5382                &col_map,
5383                &mut notes,
5384                &gam_runtime::resource::ResourcePolicy::default_library(),
5385            )
5386            .unwrap_or_else(|err| {
5387                panic!("`{formula}` must degree-reduce, not error; got: {err:?}")
5388            });
5389            let SmoothBasisSpec::BSpline1D { spec, .. } = &terms.smooth_terms[0].basis else {
5390                panic!(
5391                    "`{formula}` must lower to a BSpline1D; got {:?}",
5392                    terms.smooth_terms[0].basis
5393                );
5394            };
5395            assert_eq!(
5396                spec.degree, 2,
5397                "`{formula}` must drop the cubic default to a quadratic basis"
5398            );
5399            let num_internal = match &spec.knotspec {
5400                BSplineKnotSpec::Generate {
5401                    num_internal_knots, ..
5402                } => *num_internal_knots,
5403                BSplineKnotSpec::Automatic {
5404                    num_internal_knots: Some(n),
5405                    ..
5406                } => *n,
5407                other => panic!("`{formula}` unexpected knotspec: {other:?}"),
5408            };
5409            assert_eq!(
5410                num_internal, 0,
5411                "`{formula}` must have zero internal knots (num_basis = k = 3)"
5412            );
5413            // Resulting basis dimension is num_internal + degree + 1 = 3 = k.
5414            assert!(
5415                spec.penalty_order >= 1 && spec.penalty_order <= spec.degree,
5416                "`{formula}` penalty_order {} must satisfy 1 <= order <= degree={}",
5417                spec.penalty_order,
5418                spec.degree
5419            );
5420        }
5421    }
5422
5423    #[test]
5424    fn formula_shape_constraint_round_trips_and_rejects_bogus() {
5425        let ds = continuous_dataset(
5426            &["y", "x"],
5427            (0..32)
5428                .map(|i| {
5429                    let x = i as f64 / 31.0;
5430                    vec![x * x, x]
5431                })
5432                .collect(),
5433        );
5434        let col_map = ds.column_map();
5435
5436        let parsed =
5437            parse_formula("y ~ s(x, shape=monotone_increasing)").expect("parse monotone smooth");
5438        let mut notes = Vec::new();
5439        let terms = build_termspec(
5440            &parsed.terms,
5441            &ds,
5442            &col_map,
5443            &mut notes,
5444            &gam_runtime::resource::ResourcePolicy::default_library(),
5445        )
5446        .expect("monotone smooth should build");
5447        assert_eq!(
5448            terms.smooth_terms[0].shape,
5449            ShapeConstraint::MonotoneIncreasing
5450        );
5451
5452        let parsed_bad = parse_formula("y ~ s(x, shape=bogus)").expect("parse bogus shape");
5453        let mut notes_bad = Vec::new();
5454        let err = build_termspec(
5455            &parsed_bad.terms,
5456            &ds,
5457            &col_map,
5458            &mut notes_bad,
5459            &gam_runtime::resource::ResourcePolicy::default_library(),
5460        )
5461        .expect_err("bogus shape must error");
5462        assert!(
5463            format!("{err:?}").contains("unknown shape constraint"),
5464            "got: {err:?}"
5465        );
5466    }
5467
5468    #[test]
5469    fn default_sphere_smooth_uses_spherical_farthest_point_centers() {
5470        let ds = continuous_dataset(
5471            &["y", "lat", "lon"],
5472            (0..24)
5473                .map(|i| {
5474                    let t = i as f64 / 24.0;
5475                    let lat = -60.0 + 120.0 * t;
5476                    let lon = -180.0 + 360.0 * ((7 * i) % 24) as f64 / 24.0;
5477                    vec![lat.to_radians().sin(), lat, lon]
5478                })
5479                .collect(),
5480        );
5481        let parsed = parse_formula("y ~ sphere(lat, lon)").expect("parse");
5482        let col_map = ds.column_map();
5483        let mut notes = Vec::new();
5484        let terms = build_termspec(
5485            &parsed.terms,
5486            &ds,
5487            &col_map,
5488            &mut notes,
5489            &gam_runtime::resource::ResourcePolicy::default_library(),
5490        )
5491        .expect("build sphere termspec");
5492        let SmoothBasisSpec::Sphere { spec, .. } = &terms.smooth_terms[0].basis else {
5493            panic!("expected sphere term");
5494        };
5495        assert!(matches!(
5496            spec.center_strategy,
5497            CenterStrategy::FarthestPoint { .. }
5498        ));
5499    }
5500
5501    #[test]
5502    fn one_dimensional_duchon_defaults_to_scale_free_length_scale() {
5503        let ds = continuous_dataset(
5504            &["y", "x"],
5505            (0..32)
5506                .map(|i| {
5507                    let x = i as f64 / 31.0;
5508                    vec![(std::f64::consts::TAU * x).sin(), x]
5509                })
5510                .collect(),
5511        );
5512        let parsed = parse_formula("y ~ duchon(x)").expect("parse");
5513        let col_map = ds.column_map();
5514        let mut notes = Vec::new();
5515        let terms = build_termspec(
5516            &parsed.terms,
5517            &ds,
5518            &col_map,
5519            &mut notes,
5520            &gam_runtime::resource::ResourcePolicy::default_library(),
5521        )
5522        .expect("build default duchon termspec");
5523        let SmoothBasisSpec::Duchon { spec, .. } = &terms.smooth_terms[0].basis else {
5524            panic!("expected Duchon term");
5525        };
5526        assert_eq!(spec.length_scale, None);
5527    }
5528
5529    #[test]
5530    fn one_dimensional_duchon_length_scale_opts_into_hybrid_mode() {
5531        let ds = continuous_dataset(
5532            &["y", "x"],
5533            (0..32)
5534                .map(|i| {
5535                    let x = i as f64 / 31.0;
5536                    vec![(std::f64::consts::TAU * x).sin(), x]
5537                })
5538                .collect(),
5539        );
5540        let parsed = parse_formula("y ~ duchon(x, length_scale=0.25)").expect("parse");
5541        let col_map = ds.column_map();
5542        let mut notes = Vec::new();
5543        let terms = build_termspec(
5544            &parsed.terms,
5545            &ds,
5546            &col_map,
5547            &mut notes,
5548            &gam_runtime::resource::ResourcePolicy::default_library(),
5549        )
5550        .expect("build hybrid duchon termspec");
5551        let SmoothBasisSpec::Duchon { spec, .. } = &terms.smooth_terms[0].basis else {
5552            panic!("expected Duchon term");
5553        };
5554        assert_eq!(spec.length_scale, Some(0.25));
5555    }
5556
5557    #[test]
5558    fn parse_matern_nu_accepts_equivalent_half_integer_forms() {
5559        let cases = [
5560            ("1/2", MaternNu::Half),
5561            (" 1 / 2 ", MaternNu::Half),
5562            (".5", MaternNu::Half),
5563            ("0.50", MaternNu::Half),
5564            ("half", MaternNu::Half),
5565            ("3 / 2", MaternNu::ThreeHalves),
5566            ("1.50", MaternNu::ThreeHalves),
5567            ("5 / 2", MaternNu::FiveHalves),
5568            ("2.500000000000", MaternNu::FiveHalves),
5569            ("7 / 2", MaternNu::SevenHalves),
5570            ("3.50", MaternNu::SevenHalves),
5571            ("9 / 2", MaternNu::NineHalves),
5572            ("4.50", MaternNu::NineHalves),
5573        ];
5574        for (raw, expected) in cases {
5575            let parsed = parse_matern_nu(raw).expect(raw);
5576            assert!(
5577                matches!(
5578                    (parsed, expected),
5579                    (MaternNu::Half, MaternNu::Half)
5580                        | (MaternNu::ThreeHalves, MaternNu::ThreeHalves)
5581                        | (MaternNu::FiveHalves, MaternNu::FiveHalves)
5582                        | (MaternNu::SevenHalves, MaternNu::SevenHalves)
5583                        | (MaternNu::NineHalves, MaternNu::NineHalves)
5584                ),
5585                "parsed {raw:?} as {parsed:?}, expected {expected:?}"
5586            );
5587        }
5588    }
5589
5590    #[test]
5591    fn parse_matern_nu_rejects_unsupported_or_invalid_values() {
5592        for raw in ["1", "2", "11/2", "1/0", "nan", "fast"] {
5593            let err = parse_matern_nu(raw).expect_err(raw);
5594            assert!(
5595                err.contains("supported half-integer values"),
5596                "unexpected error for {raw:?}: {err}"
5597            );
5598        }
5599    }
5600
5601    #[test]
5602    fn parse_ps_k_promotes_underexpressive_cubic_basis() {
5603        let mut opts = BTreeMap::new();
5604        opts.insert("k".to_string(), "4".to_string());
5605        let (internal, inferred, eff_degree) = parse_ps_internal_knots(&opts, 3, 20).expect("k=4");
5606        assert_eq!(internal, 2);
5607        assert_eq!(eff_degree, 3);
5608        assert!(!inferred);
5609
5610        opts.insert("k".to_string(), "6".to_string());
5611        let (internal, inferred, eff_degree) = parse_ps_internal_knots(&opts, 3, 20).expect("k=6");
5612        assert_eq!(internal, 2);
5613        assert_eq!(eff_degree, 3);
5614        assert!(!inferred);
5615
5616        opts.insert("k".to_string(), "10".to_string());
5617        let (internal, inferred, eff_degree) = parse_ps_internal_knots(&opts, 3, 20).expect("k=10");
5618        assert_eq!(internal, 6);
5619        assert_eq!(eff_degree, 3);
5620        assert!(!inferred);
5621    }
5622
5623    #[test]
5624    fn parse_ps_internal_knots_drops_degree_for_small_k() {
5625        // mgcv's `s(x, bs="ps", k=3)` with the default cubic basis silently
5626        // reduces to a quadratic (`degree=2`) marginal. `k=3, degree=3`
5627        // should yield a quadratic basis with zero internal knots
5628        // (`num_basis = k = 3`).
5629        let mut opts = BTreeMap::new();
5630        opts.insert("k".to_string(), "3".to_string());
5631        let (internal, inferred, eff_degree) = parse_ps_internal_knots(&opts, 3, 20).expect("k=3");
5632        assert_eq!(eff_degree, 2);
5633        assert_eq!(internal, 0);
5634        assert!(!inferred);
5635
5636        // `k=2` reduces to a linear (`degree=1`) marginal — the smallest
5637        // non-trivial spline basis.
5638        opts.insert("k".to_string(), "2".to_string());
5639        let (internal, inferred, eff_degree) = parse_ps_internal_knots(&opts, 3, 20).expect("k=2");
5640        assert_eq!(eff_degree, 1);
5641        assert_eq!(internal, 0);
5642        assert!(!inferred);
5643
5644        // The under-2 case is structurally under-specified and rejected even
5645        // by the degree-reducing variant: no B-spline basis has fewer than
5646        // two functions.
5647        opts.insert("k".to_string(), "1".to_string());
5648        let err = parse_ps_internal_knots(&opts, 3, 20)
5649            .expect_err("k=1 is below the irreducible spline floor");
5650        assert!(err.contains("requires k >= 2"), "unexpected error: {err}");
5651
5652        // When the user already passed `k >= degree+1`, the helper must
5653        // preserve the existing knot geometry exactly.
5654        opts.insert("k".to_string(), "4".to_string());
5655        let (internal, inferred, eff_degree) = parse_ps_internal_knots(&opts, 3, 20).expect("k=4");
5656        assert_eq!(eff_degree, 3);
5657        assert_eq!(internal, 2);
5658        assert!(!inferred);
5659    }
5660
5661    #[test]
5662    fn factor_smooth_marginal_degree_reduces_for_small_k() {
5663        let ds = factor_dataset();
5664        let col_map = ds.column_map();
5665
5666        for (k, expected_degree) in [(3usize, 2usize), (2usize, 1usize)] {
5667            let parsed =
5668                parse_formula(&format!("y ~ s(x, g, bs=fs, k={k})")).expect("parse factor smooth");
5669            let mut notes = Vec::new();
5670            let terms = build_termspec(
5671                &parsed.terms,
5672                &ds,
5673                &col_map,
5674                &mut notes,
5675                &gam_runtime::resource::ResourcePolicy::default_library(),
5676            )
5677            .unwrap_or_else(|err| panic!("fs k={k} should degree-reduce, got: {err:?}"));
5678            let SmoothBasisSpec::FactorSmooth { spec } = &terms.smooth_terms[0].basis else {
5679                panic!(
5680                    "expected factor smooth, got {:?}",
5681                    terms.smooth_terms[0].basis
5682                );
5683            };
5684            assert_eq!(spec.marginal.degree, expected_degree);
5685            assert!(
5686                spec.marginal.penalty_order <= spec.marginal.degree,
5687                "penalty_order {} must be clamped to degree {}",
5688                spec.marginal.penalty_order,
5689                spec.marginal.degree
5690            );
5691            let basis_size = match spec.marginal.knotspec {
5692                BSplineKnotSpec::Generate {
5693                    num_internal_knots, ..
5694                } => num_internal_knots + spec.marginal.degree + 1,
5695                BSplineKnotSpec::Automatic {
5696                    num_internal_knots: Some(num_internal_knots),
5697                    ..
5698                } => num_internal_knots + spec.marginal.degree + 1,
5699                ref other => panic!("unexpected factor-smooth knotspec: {other:?}"),
5700            };
5701            assert_eq!(basis_size, k);
5702        }
5703    }
5704
5705    /// Build a dataset with a ternary continuous covariate `x ∈ {0,1,2}` and a
5706    /// 2-level categorical group `g`, for the low-cardinality cr-cap tests.
5707    fn ternary_factor_dataset() -> Dataset {
5708        let rows = (0..120)
5709            .map(|i| {
5710                let x = (i % 3) as f64;
5711                let g = (i % 2) as f64;
5712                vec![x + g, x, g]
5713            })
5714            .collect::<Vec<_>>();
5715        Dataset {
5716            headers: vec!["y".into(), "x".into(), "g".into()],
5717            values: Array2::from_shape_vec(
5718                (rows.len(), 3),
5719                rows.into_iter().flat_map(|row| row.into_iter()).collect(),
5720            )
5721            .expect("rectangular ternary factor test data"),
5722            schema: DataSchema {
5723                columns: vec![
5724                    SchemaColumn {
5725                        name: "y".into(),
5726                        kind: ColumnKindTag::Continuous,
5727                        levels: vec![],
5728                    },
5729                    SchemaColumn {
5730                        name: "x".into(),
5731                        kind: ColumnKindTag::Continuous,
5732                        levels: vec![],
5733                    },
5734                    SchemaColumn {
5735                        name: "g".into(),
5736                        kind: ColumnKindTag::Categorical,
5737                        levels: vec!["a".into(), "b".into()],
5738                    },
5739                ],
5740            },
5741            column_kinds: vec![
5742                ColumnKindTag::Continuous,
5743                ColumnKindTag::Continuous,
5744                ColumnKindTag::Categorical,
5745            ],
5746        }
5747    }
5748
5749    #[test]
5750    fn univariate_cr_smooth_caps_knots_to_data_support() {
5751        // #1541: `s(x, bs=cr, k=10)` on a ternary covariate (3 distinct values)
5752        // must NOT hard-fail in cr-knot selection ("cubic regression spline with
5753        // k=10 requires at least 10 distinct values, got 3"). The cr basis is
5754        // capped to the data support — exactly 3 value-knots at {0,1,2} — which
5755        // is full-rank for the data, so it can still represent any 3 group means.
5756        let ds = continuous_dataset(
5757            &["y", "x"],
5758            (0..90)
5759                .map(|i| vec![(i % 3) as f64, (i % 3) as f64])
5760                .collect(),
5761        );
5762        let col_map = ds.column_map();
5763        let parsed = parse_formula("y ~ s(x, bs=cr, k=10)").expect("parse cr smooth");
5764        let mut notes = Vec::new();
5765        let terms = build_termspec(
5766            &parsed.terms,
5767            &ds,
5768            &col_map,
5769            &mut notes,
5770            &gam_runtime::resource::ResourcePolicy::default_library(),
5771        )
5772        .expect("cr k=10 must cap to data support instead of erroring");
5773        let SmoothBasisSpec::BSpline1D { spec, .. } = &terms.smooth_terms[0].basis else {
5774            panic!("expected BSpline1D for s(x, bs=cr)");
5775        };
5776        let BSplineKnotSpec::NaturalCubicRegression { knots } = &spec.knotspec else {
5777            panic!("expected cr knotspec, got {:?}", spec.knotspec);
5778        };
5779        // Capped to exactly the 3 distinct covariate values.
5780        assert_eq!(knots.len(), 3, "cr basis not capped to 3 distinct values");
5781        assert_eq!(knots.as_slice().unwrap(), &[0.0, 1.0, 2.0]);
5782        // The reduction is surfaced to the user (mgcv warns in the same case).
5783        assert!(
5784            notes.iter().any(|n| n.contains("data-support cap")),
5785            "cap not reported in inference notes: {notes:?}"
5786        );
5787    }
5788
5789    #[test]
5790    fn univariate_cr_smooth_binary_covariate_degrades_to_bspline() {
5791        // #1541: a BINARY covariate has too few distinct values (2) for ANY cr
5792        // spline (needs >= 3 distinct). `s(x, bs=cr)` must degrade to a B-spline
5793        // marginal — the default basis the same data already fits — NOT hard-fail.
5794        let ds = continuous_dataset(
5795            &["y", "x"],
5796            (0..80)
5797                .map(|i| vec![(i % 2) as f64, (i % 2) as f64])
5798                .collect(),
5799        );
5800        let col_map = ds.column_map();
5801        let parsed = parse_formula("y ~ s(x, bs=cr, k=10)").expect("parse cr smooth");
5802        let mut notes = Vec::new();
5803        let terms = build_termspec(
5804            &parsed.terms,
5805            &ds,
5806            &col_map,
5807            &mut notes,
5808            &gam_runtime::resource::ResourcePolicy::default_library(),
5809        )
5810        .expect("binary cr must degrade to B-spline instead of erroring");
5811        let SmoothBasisSpec::BSpline1D { spec, .. } = &terms.smooth_terms[0].basis else {
5812            panic!("expected BSpline1D for s(x, bs=cr)");
5813        };
5814        assert!(
5815            !matches!(
5816                spec.knotspec,
5817                BSplineKnotSpec::NaturalCubicRegression { .. }
5818            ),
5819            "binary covariate must NOT build a cr basis, got {:?}",
5820            spec.knotspec
5821        );
5822        assert!(
5823            notes
5824                .iter()
5825                .any(|n| n.contains("Degraded to the linear B-spline")),
5826            "degradation not reported in inference notes: {notes:?}"
5827        );
5828    }
5829
5830    #[test]
5831    fn sz_factor_smooth_low_cardinality_uses_bspline_marginal() {
5832        // #1605: the `sz` factor-smooth marginal is the SAME penalized B-spline
5833        // the `fs` sibling uses — NOT a natural cubic regression (`cr`) marginal,
5834        // whose hard natural boundary conditions f''=0 bias curved deviations
5835        // (a consistency failure). #1542 (the reason this test exists) is
5836        // subsumed: with a B-spline marginal a low-cardinality covariate no
5837        // longer needs a special cr data-support cap and can never hard-fail the
5838        // way the old cr-marginal `sz` spelling did — the build just succeeds,
5839        // exactly as `fs` already does on the identical data.
5840        let ds = ternary_factor_dataset();
5841        let col_map = ds.column_map();
5842        let parsed = parse_formula("y ~ s(x, g, bs=sz, k=10)").expect("parse sz factor smooth");
5843        let mut notes = Vec::new();
5844        let terms = build_termspec(
5845            &parsed.terms,
5846            &ds,
5847            &col_map,
5848            &mut notes,
5849            &gam_runtime::resource::ResourcePolicy::default_library(),
5850        )
5851        .expect("sz on a ternary covariate must build (B-spline marginal), not hard-fail");
5852        let SmoothBasisSpec::FactorSmooth { spec } = &terms.smooth_terms[0].basis else {
5853            panic!("expected FactorSmooth for s(x, g, bs=sz)");
5854        };
5855        assert!(
5856            !matches!(
5857                spec.marginal.knotspec,
5858                BSplineKnotSpec::NaturalCubicRegression { .. }
5859            ),
5860            "sz marginal must be a B-spline (curvature-capable), not the \
5861             natural-BC cr basis; got {:?}",
5862            spec.marginal.knotspec
5863        );
5864    }
5865
5866    /// A dataset with a genuinely continuous covariate `x` (many distinct
5867    /// values) and a `L`-level grouping factor `g`, suitable for building a
5868    /// real factor-smooth marginal with a non-trivial {const, linear} null
5869    /// space. `y` is unused by the structural penalty checks below.
5870    fn continuous_x_factor_dataset(n: usize, n_groups: usize) -> Dataset {
5871        let rows = (0..n)
5872            .map(|i| {
5873                let x = i as f64 / (n as f64 - 1.0);
5874                let g = (i % n_groups) as f64;
5875                vec![x + g, x, g]
5876            })
5877            .collect::<Vec<_>>();
5878        let levels: Vec<String> = (0..n_groups).map(|k| format!("g{k}")).collect();
5879        Dataset {
5880            headers: vec!["y".into(), "x".into(), "g".into()],
5881            values: Array2::from_shape_vec(
5882                (rows.len(), 3),
5883                rows.into_iter().flat_map(|row| row.into_iter()).collect(),
5884            )
5885            .expect("rectangular continuous-x factor data"),
5886            schema: DataSchema {
5887                columns: vec![
5888                    SchemaColumn {
5889                        name: "y".into(),
5890                        kind: ColumnKindTag::Continuous,
5891                        levels: vec![],
5892                    },
5893                    SchemaColumn {
5894                        name: "x".into(),
5895                        kind: ColumnKindTag::Continuous,
5896                        levels: vec![],
5897                    },
5898                    SchemaColumn {
5899                        name: "g".into(),
5900                        kind: ColumnKindTag::Categorical,
5901                        levels,
5902                    },
5903                ],
5904            },
5905            column_kinds: vec![
5906                ColumnKindTag::Continuous,
5907                ColumnKindTag::Continuous,
5908                ColumnKindTag::Categorical,
5909            ],
5910        }
5911    }
5912
5913    fn factor_smooth_spec_for(formula: &str, ds: &Dataset) -> FactorSmoothSpec {
5914        let col_map = ds.column_map();
5915        let parsed = parse_formula(formula).expect("parse factor smooth formula");
5916        let mut notes = Vec::new();
5917        let terms = build_termspec(
5918            &parsed.terms,
5919            ds,
5920            &col_map,
5921            &mut notes,
5922            &gam_runtime::resource::ResourcePolicy::default_library(),
5923        )
5924        .expect("build factor smooth term");
5925        let SmoothBasisSpec::FactorSmooth { spec } = &terms.smooth_terms[0].basis else {
5926            panic!("expected FactorSmooth basis for `{formula}`");
5927        };
5928        spec.clone()
5929    }
5930
5931    /// #1605: the sum-to-zero factor smooth `s(x, g, bs="sz")` under-fit data
5932    /// drawn from its own model class because its deviation blocks carried ONLY
5933    /// the marginal wiggliness penalty — the {const, linear} null space of every
5934    /// deviation curve was left completely unpenalized, so the single combined
5935    /// wiggliness λ could not separate per-group intercept/slope variance from
5936    /// curvature variance and REML parked it over-smoothed (same defect class as
5937    /// the closed #700, more severe). mgcv's `bs="fs"` sibling avoids the gap by
5938    /// adding a SEPARATE per-null-dimension ridge (one λ each), the
5939    /// double-penalty `I_L ⊗ S_j` structure. The fix gives `sz` the same
5940    /// null-space-ridge structure, mapped into the zero-sum CONTRAST space so the
5941    /// constraint (and `sz`'s distinctness from `fs`) is preserved.
5942    ///
5943    /// This pins the structural defect: after the fix the `sz` deviation build
5944    /// must carry MORE than just its wiggliness penalty(s) — exactly one extra
5945    /// null-space-ridge penalty per marginal null direction, matching the count
5946    /// that `fs` carries — while keeping the narrower `(L-1)·p` zero-sum design
5947    /// (NOT the `L·p` full-rank `fs` design). Before the fix `sz` carried only
5948    /// the wiggliness penalties and this fails.
5949    #[test]
5950    fn sz_factor_smooth_carries_null_space_ridge_like_fs() {
5951        let ds = continuous_x_factor_dataset(180, 4);
5952        let mut workspace = crate::basis::BasisWorkspace::new();
5953
5954        let sz_spec = factor_smooth_spec_for("y ~ s(x, g, bs=sz, k=8)", &ds);
5955        let sz_built = crate::smooth::build_factor_smooth(
5956            ds.values.view(),
5957            &sz_spec,
5958            "sz_term",
5959            &mut workspace,
5960        )
5961        .expect("build sz factor smooth");
5962
5963        let fs_spec = factor_smooth_spec_for("y ~ s(x, g, bs=fs, k=8)", &ds);
5964        let fs_built = crate::smooth::build_factor_smooth(
5965            ds.values.view(),
5966            &fs_spec,
5967            "fs_term",
5968            &mut workspace,
5969        )
5970        .expect("build fs factor smooth");
5971
5972        // Penalty structure (#1074 + #1605). `fs` is the exchangeable
5973        // random-effect smooth: all `L` level blocks share ONE wiggliness λ per
5974        // marginal penalty, plus one rank-1 null-space ridge per marginal null
5975        // direction (the #1605 double penalty). `sz` is the sum-to-zero factor
5976        // smooth and mgcv's `smooth.construct.sz` emits ONE penalty matrix PER
5977        // LEVEL — `L` independent curvature smoothing parameters — so REML can
5978        // shrink a low-amplitude group's deviation hard while leaving a busy
5979        // group nearly unpenalized. We mirror that: the single marginal
5980        // wiggliness penalty is split into its `L` independent zero-sum-contrast
5981        // summands (`L-1` free per-group blocks `(e_k e_kᵀ)⊗S` + the reference
5982        // coupling block `(11ᵀ)⊗S`), each carrying its own λ, and the null-space
5983        // ridges stay POOLED (the per-group intercept/slope shrinkage mgcv pools
5984        // under one variance even for `sz`).
5985        //
5986        // So with `nw` marginal wiggliness penalties and `nn` marginal null
5987        // directions: fs has `nw + nn` penalties; sz has `L·nw + nn`. sz must
5988        // therefore carry strictly MORE penalties than fs (the per-group split),
5989        // and the surplus must be exactly `(L-1)·nw`.
5990        let n_levels = sz_spec
5991            .group_frozen_levels
5992            .as_ref()
5993            .map(|l| l.len())
5994            .unwrap_or(4);
5995        assert!(n_levels >= 3, "test needs >=3 groups, got {n_levels}");
5996
5997        // fs = nw + nn  ⇒  nn = fs_penalties - nw. The marginal has nw==1
5998        // wiggliness penalty (a single difference/curvature operator), so the
5999        // per-group split adds exactly (L-1)·nw = (L-1) extra penalties on top of
6000        // fs's count.
6001        let nw = 1usize; // one marginal wiggliness penalty for the B-spline marginal
6002        let expected_sz = fs_built.penalties.len() + (n_levels - 1) * nw;
6003        assert_eq!(
6004            sz_built.penalties.len(),
6005            expected_sz,
6006            "sz must split its wiggliness penalty per level (#1074): expected \
6007             fs_count {} + (L-1)·nw {} = {}, but sz had {}",
6008            fs_built.penalties.len(),
6009            (n_levels - 1) * nw,
6010            expected_sz,
6011            sz_built.penalties.len(),
6012        );
6013        assert!(
6014            sz_built.penalties.len() > fs_built.penalties.len(),
6015            "sz must carry strictly more penalties than fs after the per-group \
6016             split (sz={}, fs={})",
6017            sz_built.penalties.len(),
6018            fs_built.penalties.len(),
6019        );
6020
6021        // The null-space ridges must still be present (the #1605 property that
6022        // keeps the deviation curvature un-over-smoothed). After removing the `L`
6023        // per-group wiggliness blocks, the remainder are the pooled null ridges,
6024        // and there must be at least one (a B-spline marginal has a non-empty
6025        // {const, linear} null space).
6026        let n_wiggliness = n_levels * nw; // L per-group blocks
6027        assert!(
6028            sz_built.penalties.len() > n_wiggliness,
6029            "sz deviation block carries no null-space ridge (penalties={}, \
6030             wiggliness blocks={}); the null space is unpenalized and REML \
6031             over-smooths the deviations",
6032            sz_built.penalties.len(),
6033            n_wiggliness,
6034        );
6035
6036        // The zero-sum constraint must be preserved: the sz design must stay the
6037        // NARROWER `(L-1)·p` contrast design, strictly narrower than the fs
6038        // full-rank `L·p` design. This guards against "fixing" sz by making it
6039        // identical to fs (which would break identifiability / sum-to-zero).
6040        assert!(
6041            sz_built.dim < fs_built.dim,
6042            "sz design width {} must be strictly less than fs width {} \
6043             (zero-sum contrast drops one level block)",
6044            sz_built.dim,
6045            fs_built.dim,
6046        );
6047
6048        // Every penalty/metadata vector must stay parallel (length invariant the
6049        // downstream REML assembly relies on).
6050        assert_eq!(sz_built.penalties.len(), sz_built.nullspaces.len());
6051        assert_eq!(sz_built.penalties.len(), sz_built.penaltyinfo.len());
6052        assert_eq!(sz_built.penalties.len(), sz_built.null_eigenvectors.len());
6053    }
6054
6055    /// #1457: `y ~ s(x, by=g) + g` with a BARE categorical `g` must NOT lower to
6056    /// two `g` design blocks. The bare `+ g` is auto-promoted to a single
6057    /// penalized random-effect block owning the factor's full level offsets; the
6058    /// `by=` branch must then recognize that owner and skip adding its own
6059    /// unpenalized treatment-coded main effect. Before the fix the dedup guard
6060    /// recognized only explicit `group(g)` (a `ParsedTerm::RandomEffect`), so the
6061    /// auto-promoted bare-`+ g` block slipped past and a spurious second `g`
6062    /// block (plus an extra smoothing parameter) was added. Assert exactly ONE
6063    /// `g` random/categorical block, and that adding the bare `+ g` introduces no
6064    /// extra `g` blocks beyond `y ~ s(x, by=g)` alone.
6065    fn factor_dataset_l3() -> Dataset {
6066        // `g` is categorical with THREE levels (encoded 0.0/1.0/2.0).
6067        let rows = (0..30)
6068            .map(|i| {
6069                let x = i as f64 / 29.0;
6070                let g = (i % 3) as f64;
6071                vec![x + g, x, g]
6072            })
6073            .collect::<Vec<_>>();
6074        Dataset {
6075            headers: vec!["y".into(), "x".into(), "g".into()],
6076            values: Array2::from_shape_vec(
6077                (rows.len(), 3),
6078                rows.into_iter().flat_map(|row| row.into_iter()).collect(),
6079            )
6080            .expect("rectangular L=3 factor test data"),
6081            schema: DataSchema {
6082                columns: vec![
6083                    SchemaColumn {
6084                        name: "y".into(),
6085                        kind: ColumnKindTag::Continuous,
6086                        levels: vec![],
6087                    },
6088                    SchemaColumn {
6089                        name: "x".into(),
6090                        kind: ColumnKindTag::Continuous,
6091                        levels: vec![],
6092                    },
6093                    SchemaColumn {
6094                        name: "g".into(),
6095                        kind: ColumnKindTag::Categorical,
6096                        levels: vec!["a".into(), "b".into(), "c".into()],
6097                    },
6098                ],
6099            },
6100            column_kinds: vec![
6101                ColumnKindTag::Continuous,
6102                ColumnKindTag::Continuous,
6103                ColumnKindTag::Categorical,
6104            ],
6105        }
6106    }
6107
6108    #[test]
6109    fn factor_by_smooth_plus_bare_categorical_does_not_duplicate_factor_block() {
6110        let ds = factor_dataset_l3();
6111        let col_map = ds.column_map();
6112
6113        let g_blocks = |formula: &str| -> usize {
6114            let parsed = parse_formula(formula).expect("parse by-smooth formula");
6115            let mut notes = Vec::new();
6116            let terms = build_termspec(
6117                &parsed.terms,
6118                &ds,
6119                &col_map,
6120                &mut notes,
6121                &ResourcePolicy::default_library(),
6122            )
6123            .unwrap_or_else(|err| panic!("`{formula}` must build, got: {err:?}"));
6124            terms
6125                .random_effect_terms
6126                .iter()
6127                .filter(|rt| rt.name == "g")
6128                .count()
6129        };
6130
6131        // Baseline: the standalone factor-by smooth carries exactly ONE `g`
6132        // block (the unpenalized treatment-coded factor main effect added by the
6133        // `by=` branch).
6134        let by_only = g_blocks("y ~ s(x, by=g, k=10)");
6135        assert_eq!(
6136            by_only, 1,
6137            "`y ~ s(x, by=g)` must produce exactly one `g` design block"
6138        );
6139
6140        // The bug: adding a bare `+ g` (auto-promoted to a penalized random
6141        // block owning the same level offsets) must NOT introduce a second `g`
6142        // block. Before the fix this was 2.
6143        let by_plus_bare = g_blocks("y ~ s(x, by=g, k=10) + g");
6144        assert_eq!(
6145            by_plus_bare, 1,
6146            "`y ~ s(x, by=g) + g` must collapse to ONE `g` block (#1457): the bare \
6147             `+ g` already owns the factor's level offsets, so the `by=` branch \
6148             must not add a second, treatment-coded main effect"
6149        );
6150
6151        // The bare `+ g` adds no spurious extra `g` block versus the baseline.
6152        assert_eq!(
6153            by_plus_bare, by_only,
6154            "the bare `+ g` collision must add zero extra `g` blocks (#1457)"
6155        );
6156    }
6157
6158    #[test]
6159    fn parse_tensor_periods_and_origins_aliases() {
6160        let mut opts = BTreeMap::new();
6161        opts.insert(
6162            "boundary".to_string(),
6163            "['periodic', 'periodic']".to_string(),
6164        );
6165        opts.insert("periods".to_string(), "[7, 24]".to_string());
6166        opts.insert("origins".to_string(), "[0, -12]".to_string());
6167        let axes = parse_periodic_axes(&opts, 2).expect("axes");
6168        let periods = parse_periods(&opts, &axes).expect("periods");
6169        let origins = parse_period_origins(&opts, &axes).expect("origins");
6170        assert_eq!(axes, vec![true, true]);
6171        assert_eq!(periods, vec![Some(7.0), Some(24.0)]);
6172        assert_eq!(origins, vec![Some(0.0), Some(-12.0)]);
6173    }
6174
6175    #[test]
6176    fn tensor_smooth_honors_per_margin_k_list() {
6177        let ds = continuous_dataset(
6178            &["y", "theta", "h"],
6179            (0..20)
6180                .map(|i| {
6181                    let theta = std::f64::consts::TAU * i as f64 / 20.0;
6182                    let h = -1.0 + 2.0 * (i % 5) as f64 / 4.0;
6183                    vec![theta.cos() + h, theta, h]
6184                })
6185                .collect(),
6186        );
6187        let parsed = parse_formula(
6188            "y ~ te(theta, h, periodic=[0], period=[2*pi, None], origin=[0, None], k=[9,5])",
6189        )
6190        .expect("parse tensor formula");
6191        let col_map = ds.column_map();
6192        let mut notes = Vec::new();
6193        let terms = build_termspec(
6194            &parsed.terms,
6195            &ds,
6196            &col_map,
6197            &mut notes,
6198            &gam_runtime::resource::ResourcePolicy::default_library(),
6199        )
6200        .expect("build tensor terms");
6201        let SmoothBasisSpec::TensorBSpline { spec, .. } = &terms.smooth_terms[0].basis else {
6202            panic!("expected tensor B-spline");
6203        };
6204        let dims = spec
6205            .marginalspecs
6206            .iter()
6207            .map(|m| match m.knotspec {
6208                BSplineKnotSpec::PeriodicUniform { num_basis, .. } => num_basis,
6209                BSplineKnotSpec::Generate {
6210                    num_internal_knots, ..
6211                } => num_internal_knots + m.degree + 1,
6212                // The mgcv-default `cr` margin (#1074) reports its basis size as
6213                // the number of value-knots placed.
6214                BSplineKnotSpec::NaturalCubicRegression { ref knots } => knots.len(),
6215                _ => panic!("unexpected tensor marginal knotspec"),
6216            })
6217            .collect::<Vec<_>>();
6218        assert_eq!(dims, vec![9, 5]);
6219    }
6220
6221    #[test]
6222    fn tensor_smooth_honors_per_margin_k_axis_aliases() {
6223        let ds = continuous_dataset(
6224            &["resp", "x", "y"],
6225            (0..12)
6226                .map(|i| {
6227                    let t = i as f64 / 11.0;
6228                    vec![t, t, 1.0 - t]
6229                })
6230                .collect(),
6231        );
6232        assert_eq!(
6233            tensor_margin_basis_sizes(&ds, "resp ~ te(x, y, k_x=9, k_y=5)"),
6234            vec![9, 5],
6235            "k_<margin> aliases should materialize requested per-margin values"
6236        );
6237    }
6238
6239    #[test]
6240    fn tensor_smooth_low_cardinality_axis_falls_back_to_lower_degree_basis() {
6241        // mgcv-style: `te(x, b, k=c(5, 2))` with a BINARY second margin (only
6242        // values {0, 1}) is a legitimate request — the binary axis can hold at
6243        // most a 2-function linear basis. We must NOT reject k=2 with a
6244        // "k too small for degree 3" config error; instead, drop the spline
6245        // degree on the binary axis to k_axis - 1 (here 1, linear) while
6246        // keeping the continuous margin at the requested degree=3, k=5.
6247        let ds = continuous_dataset(
6248            &["y", "x", "b"],
6249            (0..40)
6250                .map(|i| {
6251                    let x = i as f64 / 39.0;
6252                    let b = (i % 2) as f64;
6253                    vec![x.sin() + 0.5 * b, x, b]
6254                })
6255                .collect(),
6256        );
6257        let parsed = parse_formula("y ~ te(x, b, k=[5, 2])").expect("parse tensor with k=[5,2]");
6258        let col_map = ds.column_map();
6259        let mut notes = Vec::new();
6260        let terms = build_termspec(
6261            &parsed.terms,
6262            &ds,
6263            &col_map,
6264            &mut notes,
6265            &gam_runtime::resource::ResourcePolicy::default_library(),
6266        )
6267        .expect("build tensor with binary margin");
6268        let SmoothBasisSpec::TensorBSpline { spec, .. } = &terms.smooth_terms[0].basis else {
6269            panic!("expected tensor B-spline for te(x, b)");
6270        };
6271        // Continuous margin keeps requested degree=3 and k=5; binary margin
6272        // drops to degree=1 (linear) so the requested k=2 yields exactly two
6273        // basis functions before tensor-product identifiability is applied.
6274        let continuous = &spec.marginalspecs[0];
6275        let binary = &spec.marginalspecs[1];
6276        assert_eq!(continuous.degree, 3);
6277        assert_eq!(binary.degree, 1);
6278        assert!(
6279            binary.penalty_order >= 1 && binary.penalty_order <= binary.degree,
6280            "binary margin penalty_order {} must satisfy 1 <= order <= degree={}",
6281            binary.penalty_order,
6282            binary.degree
6283        );
6284        let basis_size = |m: &BSplineBasisSpec| match m.knotspec {
6285            BSplineKnotSpec::PeriodicUniform { num_basis, .. } => num_basis,
6286            BSplineKnotSpec::Generate {
6287                num_internal_knots, ..
6288            } => num_internal_knots + m.degree + 1,
6289            BSplineKnotSpec::Automatic {
6290                num_internal_knots: Some(n),
6291                ..
6292            } => n + m.degree + 1,
6293            // The mgcv-default `cr` margin (#1074) reports its basis size as the
6294            // number of value-knots placed.
6295            BSplineKnotSpec::NaturalCubicRegression { ref knots } => knots.len(),
6296            _ => panic!("unexpected tensor marginal knotspec"),
6297        };
6298        assert_eq!(basis_size(continuous), 5);
6299        assert_eq!(basis_size(binary), 2);
6300    }
6301
6302    #[test]
6303    fn tensor_smooth_uniform_k_is_capped_to_a_low_cardinality_margins_distinct_values() {
6304        // Regression: a SINGLE `k=5` applied to every axis of `te(x, b, k=5)`
6305        // with a BINARY second margin (`b ∈ {0, 1}`) must build a valid tensor,
6306        // NOT hard-fail in cr-knot selection ("cubic regression spline with k=5
6307        // requires at least 5 distinct values, got 2"). mgcv caps a margin's
6308        // basis to its data support; the binary axis becomes the 2-function
6309        // (linear) margin, while the continuous axis keeps the requested k=5.
6310        // This is the `te(age, badh, k=5)` real-data case that previously errored.
6311        let ds = continuous_dataset(
6312            &["y", "x", "b"],
6313            (0..40)
6314                .map(|i| {
6315                    let x = i as f64 / 39.0;
6316                    let b = (i % 2) as f64;
6317                    vec![x.sin() + 0.5 * b, x, b]
6318                })
6319                .collect(),
6320        );
6321        let parsed = parse_formula("y ~ te(x, b, k=5)").expect("parse tensor with uniform k=5");
6322        let col_map = ds.column_map();
6323        let mut notes = Vec::new();
6324        let terms = build_termspec(
6325            &parsed.terms,
6326            &ds,
6327            &col_map,
6328            &mut notes,
6329            &gam_runtime::resource::ResourcePolicy::default_library(),
6330        )
6331        .expect("uniform k=5 must auto-cap the binary margin instead of erroring");
6332        let SmoothBasisSpec::TensorBSpline { spec, .. } = &terms.smooth_terms[0].basis else {
6333            panic!("expected tensor B-spline for te(x, b)");
6334        };
6335        let basis_size = |m: &BSplineBasisSpec| match &m.knotspec {
6336            BSplineKnotSpec::PeriodicUniform { num_basis, .. } => *num_basis,
6337            BSplineKnotSpec::Generate {
6338                num_internal_knots, ..
6339            } => num_internal_knots + m.degree + 1,
6340            BSplineKnotSpec::Automatic {
6341                num_internal_knots: Some(n),
6342                ..
6343            } => n + m.degree + 1,
6344            BSplineKnotSpec::NaturalCubicRegression { knots } => knots.len(),
6345            other => panic!("unexpected tensor marginal knotspec: {other:?}"),
6346        };
6347        let binary = &spec.marginalspecs[1];
6348        // Binary margin is reduced to the 2-function linear basis its data
6349        // supports (k capped from 5 to 2, degree dropped to 1).
6350        assert_eq!(basis_size(binary), 2);
6351        assert_eq!(binary.degree, 1);
6352        // The continuous margin is unaffected by the cap (40 distinct values).
6353        assert_eq!(basis_size(&spec.marginalspecs[0]), 5);
6354    }
6355
6356    #[test]
6357    fn tensor_all_tp_margins_with_per_margin_k_routes_to_bspline_tensor() {
6358        // `te(x1, x2, bs=c('tp','tp'), k=c(5,5))` is mgcv's per-margin tp tensor
6359        // with per-margin basis sizes — a tensor product of two 1-D bases, each
6360        // of dimension 5. The list-valued `k=c(5,5)` is honored by
6361        // `parse_tensor_k_list`, producing one penalized B-spline margin per axis
6362        // (each spanning the requested per-axis thin-plate function space). This
6363        // is the same anisotropic-tensor routing the scalar/no-`k` case takes —
6364        // a `te()` request is ALWAYS a tensor product, never a silent isotropic
6365        // thin-plate substitution.
6366        let ds = continuous_dataset(
6367            &["y", "x1", "x2"],
6368            (0..32)
6369                .map(|i| {
6370                    let t = i as f64 / 31.0;
6371                    vec![t.sin(), t, 1.0 - t]
6372                })
6373                .collect(),
6374        );
6375        let parsed =
6376            parse_formula("y ~ te(x1, x2, bs=c('tp','tp'), k=c(5,5))").expect("parse tensor");
6377        let col_map = ds.column_map();
6378        let mut notes = Vec::new();
6379        let terms = build_termspec(
6380            &parsed.terms,
6381            &ds,
6382            &col_map,
6383            &mut notes,
6384            &gam_runtime::resource::ResourcePolicy::default_library(),
6385        )
6386        .expect("build tensor terms with per-margin k");
6387        let SmoothBasisSpec::TensorBSpline { spec, .. } = &terms.smooth_terms[0].basis else {
6388            panic!(
6389                "expected B-spline tensor when k=c(5,5) is supplied with bs=c('tp','tp'), got {:?}",
6390                terms.smooth_terms[0].basis
6391            );
6392        };
6393        // Since #1074 a `tp` tensor margin (k >= 3) is realized as a
6394        // Lancaster–Salkauskas natural cubic-regression margin (cr basis
6395        // dimension == knot count), not an open `Generate` B-spline. It is
6396        // still a `TensorBSpline` spec with one penalized 1-D margin per axis,
6397        // so the routing assertion above still holds; only the per-margin
6398        // knotspec variant changed. The earlier `_ => panic!` arm pinned the
6399        // pre-#1074 `Generate`-only representation and is stale. Decode every
6400        // margin variant to its basis dimension (mirroring the
6401        // `tensor_margin_basis_sizes` helper).
6402        let dims = spec
6403            .marginalspecs
6404            .iter()
6405            .map(|m| match m.knotspec {
6406                BSplineKnotSpec::Generate {
6407                    num_internal_knots, ..
6408                } => num_internal_knots + m.degree + 1,
6409                BSplineKnotSpec::Automatic {
6410                    num_internal_knots: Some(num_internal_knots),
6411                    ..
6412                } => num_internal_knots + m.degree + 1,
6413                BSplineKnotSpec::PeriodicUniform { num_basis, .. } => num_basis,
6414                BSplineKnotSpec::Provided(ref knots) => {
6415                    knots.len().saturating_sub(m.degree + 1)
6416                }
6417                BSplineKnotSpec::NaturalCubicRegression { ref knots } => knots.len(),
6418                BSplineKnotSpec::Automatic {
6419                    num_internal_knots: None,
6420                    ..
6421                } => panic!("test cannot infer automatic knot count"),
6422            })
6423            .collect::<Vec<_>>();
6424        assert_eq!(dims, vec![5, 5]);
6425    }
6426
6427    #[test]
6428    fn tensor_all_tp_margins_without_per_margin_k_builds_anisotropic_tensor() {
6429        // `te(x1, x2, bs=c('tp','tp'))` is a tensor-product request and must
6430        // build a genuine anisotropic tensor product (one smoothing parameter
6431        // per margin), NOT a silently-substituted multi-D isotropic thin-plate
6432        // radial smooth — that would be a different model (`s(x1,x2,bs='tp')`).
6433        // The routing is now consistent whether or not `k` is list-valued: a tp
6434        // margin vector always realizes each axis as a 1-D penalized B-spline
6435        // margin spanning the same per-axis thin-plate function space (#1082).
6436        let ds = continuous_dataset(
6437            &["y", "x1", "x2"],
6438            (0..32)
6439                .map(|i| {
6440                    let t = i as f64 / 31.0;
6441                    vec![t.sin(), t, 1.0 - t]
6442                })
6443                .collect(),
6444        );
6445        let parsed = parse_formula("y ~ te(x1, x2, bs=c('tp','tp'))").expect("parse tensor");
6446        let col_map = ds.column_map();
6447        let mut notes = Vec::new();
6448        let terms = build_termspec(
6449            &parsed.terms,
6450            &ds,
6451            &col_map,
6452            &mut notes,
6453            &gam_runtime::resource::ResourcePolicy::default_library(),
6454        )
6455        .expect("build tensor terms without per-margin k");
6456        let SmoothBasisSpec::TensorBSpline { spec, .. } = &terms.smooth_terms[0].basis else {
6457            panic!(
6458                "te(...,bs=c('tp','tp')) must route to an anisotropic tensor product, not a \
6459                 silent isotropic thin-plate substitution; got {:?}",
6460                terms.smooth_terms[0].basis
6461            );
6462        };
6463        assert_eq!(
6464            spec.marginalspecs.len(),
6465            2,
6466            "tp tensor must carry one penalized B-spline margin per axis"
6467        );
6468    }
6469
6470    #[test]
6471    fn explicit_basis_sizes_are_not_small_n_clamped() {
6472        let ds = continuous_dataset(
6473            &["y", "x1", "x2", "x3", "x4", "x5"],
6474            (0..12)
6475                .map(|i| {
6476                    let x = i as f64 / 11.0;
6477                    vec![x.sin(), x, x * x, x + 0.1, 1.0 - x, (2.0 * x).sin()]
6478                })
6479                .collect(),
6480        );
6481        let parsed = parse_formula("y ~ s(x1, k=10) + s(x2) + s(x3) + s(x4) + s(x5)")
6482            .expect("parse multi-smooth formula");
6483        let col_map = ds.column_map();
6484        let mut notes = Vec::new();
6485        let terms = build_termspec(
6486            &parsed.terms,
6487            &ds,
6488            &col_map,
6489            &mut notes,
6490            &gam_runtime::resource::ResourcePolicy::default_library(),
6491        )
6492        .expect("build multi-smooth terms");
6493        let SmoothBasisSpec::BSpline1D { spec, .. } = &terms.smooth_terms[0].basis else {
6494            panic!("expected first smooth to be B-spline");
6495        };
6496        assert!(matches!(
6497            &spec.knotspec,
6498            BSplineKnotSpec::Generate {
6499                num_internal_knots: 6,
6500                ..
6501            }
6502        ));
6503    }
6504
6505    #[test]
6506    fn explicit_duchon_centers_are_not_small_n_bumped() {
6507        let ds = continuous_dataset(
6508            &["y", "x1", "x2", "x3", "x4", "x5"],
6509            (0..12)
6510                .map(|i| {
6511                    let x = i as f64 / 11.0;
6512                    vec![x.sin(), x, x * x, x + 0.1, 1.0 - x, (2.0 * x).sin()]
6513                })
6514                .collect(),
6515        );
6516        // Pure 1D Duchon at default options resolves the nullspace to Linear
6517        // (2s < d forces escalation), giving 2 polynomial nullspace columns;
6518        // the well-posedness gate requires num_centers > polynomial_cols, so
6519        // 3 is the smallest valid count. It is still well below the small-N
6520        // bump target of polynomial_cols + 4 = 6, so this exercises the
6521        // "explicit value is honored" path the test name advertises.
6522        let parsed = parse_formula("y ~ duchon(x1, centers=3) + s(x2) + s(x3) + s(x4) + s(x5)")
6523            .expect("parse multi-smooth formula");
6524        let col_map = ds.column_map();
6525        let mut notes = Vec::new();
6526        let terms = build_termspec(
6527            &parsed.terms,
6528            &ds,
6529            &col_map,
6530            &mut notes,
6531            &gam_runtime::resource::ResourcePolicy::default_library(),
6532        )
6533        .expect("build multi-smooth terms");
6534        let SmoothBasisSpec::Duchon { spec, .. } = &terms.smooth_terms[0].basis else {
6535            panic!("expected first smooth to be Duchon");
6536        };
6537        assert!(matches!(
6538            spec.center_strategy,
6539            CenterStrategy::FarthestPoint { num_centers: 3 }
6540        ));
6541    }
6542
6543    #[test]
6544    fn inferred_tensor_basis_cap_uses_coordinate_support_not_duplicate_rows() {
6545        let mut unique_rows = Vec::new();
6546        for i in 0..50 {
6547            let theta = i as f64 / 50.0;
6548            for j in 0..16 {
6549                let h = -1.0 + 2.0 * (j as f64) / 15.0;
6550                let y = theta.cos() + h;
6551                unique_rows.push(vec![y, theta, h]);
6552            }
6553        }
6554        let mut repeated_rows = Vec::new();
6555        for _ in 0..12 {
6556            repeated_rows.extend(unique_rows.iter().cloned());
6557        }
6558
6559        let unique = continuous_dataset(&["y", "theta", "h"], unique_rows);
6560        let repeated = continuous_dataset(&["y", "theta", "h"], repeated_rows);
6561
6562        let unique_basis = inferred_tensor_basis_product(&unique);
6563        let repeated_basis = inferred_tensor_basis_product(&repeated);
6564
6565        assert_eq!(
6566            unique_basis, repeated_basis,
6567            "duplicating existing tensor coordinates must not inflate inferred basis width"
6568        );
6569    }
6570
6571    #[test]
6572    fn inferred_three_dim_tensor_basis_stays_bounded_for_reml_selection() {
6573        // Regression for gam#813: the inferred per-margin k must be
6574        // dimension-aware so the 3-D tensor width p = ∏ k_d does not explode.
6575        // With the old 1-D-per-margin rule a 3-D `te` defaulted to 7³=343 at
6576        // small n and 20³=8000 at larger n, making the (non-Kronecker-factorable)
6577        // full-tensor sum-to-zero penalty's O(p³) REML reparameterization a
6578        // multi-minute stall. The dimension-aware budget keeps the product near
6579        // mgcv's te default (≈5³=125) regardless of n.
6580        let make = |n: usize| -> usize {
6581            let mut rows = Vec::with_capacity(n);
6582            for i in 0..n {
6583                let f = i as f64 / n as f64;
6584                rows.push(vec![f.sin(), f, (2.0 * f).cos(), (3.0 * f) % 1.0]);
6585            }
6586            let ds = continuous_dataset(&["y", "x1", "x2", "x3"], rows);
6587            let parsed = parse_formula("y ~ te(x1, x2, x3)").expect("parse 3-D tensor");
6588            let col_map = ds.column_map();
6589            let mut notes = Vec::new();
6590            let terms = build_termspec(
6591                &parsed.terms,
6592                &ds,
6593                &col_map,
6594                &mut notes,
6595                &ResourcePolicy::default_library(),
6596            )
6597            .expect("build 3-D tensor termspec");
6598            let SmoothBasisSpec::TensorBSpline { spec, .. } = &terms.smooth_terms[0].basis else {
6599                panic!("expected tensor smooth");
6600            };
6601            spec.marginalspecs
6602                .iter()
6603                .map(|m| match m.knotspec {
6604                    BSplineKnotSpec::Generate {
6605                        num_internal_knots, ..
6606                    } => num_internal_knots + m.degree + 1,
6607                    BSplineKnotSpec::Automatic {
6608                        num_internal_knots: Some(num_internal_knots),
6609                        ..
6610                    } => num_internal_knots + m.degree + 1,
6611                    // The mgcv-default `cr` margin (#1074) reports its basis size
6612                    // as the number of value-knots placed.
6613                    BSplineKnotSpec::NaturalCubicRegression { ref knots } => knots.len(),
6614                    _ => panic!("unexpected tensor margin knotspec"),
6615                })
6616                .product()
6617        };
6618
6619        // n=30 (the issue's data): was 7³=343, must now be modest.
6620        assert!(
6621            make(60) <= 216,
6622            "3-D te at small n must stay near the mgcv te default, got {}",
6623            make(60)
6624        );
6625        // Larger n must NOT grow the product toward n³ (was 20³=8000).
6626        assert!(
6627            make(2000) <= 216,
6628            "3-D te at large n must not blow ∏k toward the data size, got {}",
6629            make(2000)
6630        );
6631    }
6632
6633    #[test]
6634    fn parse_bspline_boundary_conditions_and_side_selector() {
6635        // Non-zero anchors are rejected at parse time; the diagnostic must
6636        // name the side and value, which doubles as a check that the
6637        // `side=left` filter routes the global `anchor=` value to the
6638        // left endpoint (not the right).
6639        let mut opts = BTreeMap::new();
6640        opts.insert("boundary_conditions".to_string(), "anchored".to_string());
6641        opts.insert("side".to_string(), "left".to_string());
6642        opts.insert("anchor".to_string(), "2.5".to_string());
6643        let err = parse_bspline_boundary_conditions(&opts)
6644            .expect_err("non-zero left anchor must be rejected")
6645            .to_string();
6646        assert!(
6647            err.contains("left") && err.contains("2.5"),
6648            "rejection should name the affected side and value: {err}"
6649        );
6650
6651        // Side-specific aliases (`start_bc`/`end_bc`) plus the side-specific
6652        // anchor key (`right_anchor`) must funnel the value onto the right
6653        // endpoint — verified through the rejection diagnostic.
6654        let mut opts = BTreeMap::new();
6655        opts.insert("start_bc".to_string(), "clamped".to_string());
6656        opts.insert("end_bc".to_string(), "zero".to_string());
6657        opts.insert("right_anchor".to_string(), "-1.0".to_string());
6658        let err = parse_bspline_boundary_conditions(&opts)
6659            .expect_err("non-zero right anchor must be rejected")
6660            .to_string();
6661        assert!(
6662            err.contains("right") && err.contains("-1"),
6663            "rejection should name the affected side and value: {err}"
6664        );
6665
6666        // With anchors at zero the basis builder accepts the configuration,
6667        // so the same alias plumbing yields a clean `Anchored { value: 0.0 }`
6668        // on the right and `Clamped` on the left.
6669        let mut opts = BTreeMap::new();
6670        opts.insert("start_bc".to_string(), "clamped".to_string());
6671        opts.insert("end_bc".to_string(), "zero".to_string());
6672        let parsed = parse_bspline_boundary_conditions(&opts).expect("boundary conditions");
6673        assert!(matches!(
6674            parsed.left,
6675            BSplineEndpointBoundaryCondition::Clamped
6676        ));
6677        assert!(matches!(
6678            parsed.right,
6679            BSplineEndpointBoundaryCondition::Anchored { value } if value.abs() < 1e-12
6680        ));
6681    }
6682
6683    #[test]
6684    fn categorical_by_numeric_interaction_expands_treatment_coded_cells() {
6685        // `y ~ x:g` is an INTERACTION-ONLY numeric-by-factor model: there is no
6686        // `x` main effect, so the marginal parent that would identify a dropped
6687        // reference level is ABSENT. The expansion must therefore be marginality-
6688        // aware (gam#1158) and DUMMY-code `g` — keep ALL levels — yielding the
6689        // "common intercept, separate slopes" design (one x-slope column per
6690        // group). Treatment-coding here (dropping the reference level) would pin
6691        // the reference group's slope to zero, a rank-deficient fit; that wrong
6692        // behaviour is what this test now guards against. (The treatment-coded
6693        // path is exercised when the `x` parent is present — see
6694        // `categorical_by_numeric_interaction_keeps_treatment_coding_with_parent`.)
6695        let ds = factor_dataset();
6696        // `g` is categorical with two levels (encoded 0.0 → "a", 1.0 → "b").
6697        let parsed = parse_formula("y ~ x:g").expect("parse `y ~ x:g`");
6698        let col_map = ds.column_map();
6699        let mut notes = Vec::new();
6700        let terms = build_termspec(
6701            &parsed.terms,
6702            &ds,
6703            &col_map,
6704            &mut notes,
6705            &ResourcePolicy::default_library(),
6706        )
6707        .expect("factor-aware `x:g` interaction must build, not error");
6708
6709        assert_eq!(
6710            terms.linear_terms.len(),
6711            2,
6712            "interaction-only `x:g` keeps ALL factor levels (full dummy coding): one slope column per group"
6713        );
6714
6715        let x_col = *col_map.get("x").expect("x column");
6716        let g_col = *col_map.get("g").expect("g column");
6717
6718        // Both level gates must appear exactly once across the two cell columns,
6719        // and each cell carries `x` as a product factor (not a raw column for g).
6720        let mut seen_bits = std::collections::HashSet::new();
6721        for term in &terms.linear_terms {
6722            assert!(
6723                term.is_interaction(),
6724                "the categorical-by-numeric cell is a Wilkinson-Rogers interaction"
6725            );
6726            assert_eq!(term.feature_cols, vec![x_col]);
6727            assert_eq!(term.categorical_levels.len(), 1);
6728            let (gate_col, gate_bits) = term.categorical_levels[0];
6729            assert_eq!(gate_col, g_col);
6730            assert!(seen_bits.insert(gate_bits), "each level appears once");
6731
6732            // Realize and check it equals `1[g == gate_bits] * x` row by row.
6733            let column = term
6734                .realized_design_column(ds.values.view())
6735                .expect("realize cell column");
6736            let n = ds.values.nrows();
6737            assert_eq!(column.len(), n);
6738            for row in 0..n {
6739                let x = ds.values[[row, x_col]];
6740                let g = ds.values[[row, g_col]];
6741                let expected = if g.to_bits() == gate_bits { x } else { 0.0 };
6742                assert!(
6743                    (column[row] - expected).abs() < 1e-12,
6744                    "row {row}: g={g}, x={x}, expected {expected}, got {}",
6745                    column[row]
6746                );
6747            }
6748        }
6749        // Both the reference level "a" (0.0) and the non-reference "b" (1.0) are
6750        // kept — the reference level is NOT dropped in the interaction-only form.
6751        assert!(seen_bits.contains(&0.0_f64.to_bits()));
6752        assert!(seen_bits.contains(&1.0_f64.to_bits()));
6753    }
6754
6755    #[test]
6756    fn categorical_by_numeric_interaction_keeps_treatment_coding_with_parent() {
6757        // With the `x` main effect PRESENT (`y ~ x + x:g`), the marginal parent
6758        // that identifies a dropped reference level exists, so `x:g` keeps its
6759        // historical treatment coding: the reference level "a" is dropped and
6760        // only the non-reference slope-deviation column for "b" is emitted. This
6761        // guards that the marginality-aware fix (gam#1158) does NOT regress the
6762        // parent-present form, which must stay column-space-identical to mgcv's
6763        // `x + x:g`.
6764        let ds = factor_dataset();
6765        let parsed = parse_formula("y ~ x + x:g").expect("parse `y ~ x + x:g`");
6766        let col_map = ds.column_map();
6767        let mut notes = Vec::new();
6768        let terms = build_termspec(
6769            &parsed.terms,
6770            &ds,
6771            &col_map,
6772            &mut notes,
6773            &ResourcePolicy::default_library(),
6774        )
6775        .expect("`x + x:g` must build");
6776
6777        // One main-effect `x` column plus one treatment-coded interaction cell.
6778        let x_col = *col_map.get("x").expect("x column");
6779        let g_col = *col_map.get("g").expect("g column");
6780        let interaction_cells: Vec<_> = terms
6781            .linear_terms
6782            .iter()
6783            .filter(|t| t.is_interaction())
6784            .collect();
6785        assert_eq!(
6786            interaction_cells.len(),
6787            1,
6788            "with `x` present, `x:g` is treatment-coded → one cell (reference dropped)"
6789        );
6790        let term = interaction_cells[0];
6791        assert_eq!(term.feature_cols, vec![x_col]);
6792        assert_eq!(term.categorical_levels.len(), 1);
6793        let (gate_col, gate_bits) = term.categorical_levels[0];
6794        assert_eq!(gate_col, g_col);
6795        // The dropped reference is "a" (0.0); the kept gate is "b" (1.0).
6796        assert_eq!(gate_bits, 1.0_f64.to_bits());
6797    }
6798
6799    #[test]
6800    fn categorical_by_categorical_interaction_expands_full_cross_cells() {
6801        // `y ~ f:g` is an INTERACTION-ONLY factor-by-factor model: neither `f`
6802        // nor `g` appears as a main effect, so neither marginal parent is
6803        // present and BOTH factors must be dummy-coded (gam#1159). The correct
6804        // design is the SATURATED cell-means model: the full cross of ALL levels
6805        // (3 * 2 = 6 cells) minus ONE reference cell (the lexicographically-first
6806        // level of every factor, here f0:g0) absorbed by the intercept — rank
6807        // 6-1 = 5 cell columns + intercept, column-space-identical to `f*g`.
6808        // Treatment-coding both factors (the old behaviour) kept only
6809        // (3-1)*(2-1) = 2 cells and collapsed the rest onto the intercept, a
6810        // rank-deficient fit; that is the bug this test now guards against.
6811        let n = 30usize;
6812        let mut rows = Vec::with_capacity(n);
6813        for i in 0..n {
6814            let y = (i as f64).sin();
6815            let f = (i % 3) as f64; // 3 levels: 0,1,2
6816            let g = (i % 2) as f64; // 2 levels: 0,1
6817            rows.push(vec![y, f, g]);
6818        }
6819        let values = Array2::from_shape_vec(
6820            (n, 3),
6821            rows.into_iter().flat_map(|row| row.into_iter()).collect(),
6822        )
6823        .expect("rectangular cross-factor data");
6824        let ds = Dataset {
6825            headers: vec!["y".into(), "f".into(), "g".into()],
6826            values,
6827            schema: DataSchema {
6828                columns: vec![
6829                    SchemaColumn {
6830                        name: "y".into(),
6831                        kind: ColumnKindTag::Continuous,
6832                        levels: vec![],
6833                    },
6834                    SchemaColumn {
6835                        name: "f".into(),
6836                        kind: ColumnKindTag::Categorical,
6837                        levels: vec!["f0".into(), "f1".into(), "f2".into()],
6838                    },
6839                    SchemaColumn {
6840                        name: "g".into(),
6841                        kind: ColumnKindTag::Categorical,
6842                        levels: vec!["g0".into(), "g1".into()],
6843                    },
6844                ],
6845            },
6846            column_kinds: vec![
6847                ColumnKindTag::Continuous,
6848                ColumnKindTag::Categorical,
6849                ColumnKindTag::Categorical,
6850            ],
6851        };
6852
6853        let parsed = parse_formula("y ~ f:g").expect("parse `y ~ f:g`");
6854        let col_map = ds.column_map();
6855        let mut notes = Vec::new();
6856        let terms = build_termspec(
6857            &parsed.terms,
6858            &ds,
6859            &col_map,
6860            &mut notes,
6861            &ResourcePolicy::default_library(),
6862        )
6863        .expect("factor-by-factor `f:g` interaction must build, not error");
6864
6865        assert_eq!(
6866            terms.linear_terms.len(),
6867            5,
6868            "saturated 3*2 = 6 cross cells minus one reference cell (f0:g0) = 5"
6869        );
6870
6871        let f_col = *col_map.get("f").expect("f column");
6872        let g_col = *col_map.get("g").expect("g column");
6873        // The dropped reference cell pairs each factor's lexicographically-first
6874        // level: f0 (0.0) and g0 (0.0). It must NOT appear among the emitted
6875        // cells; every OTHER cross cell must.
6876        let f0 = 0.0_f64.to_bits();
6877        let g0 = 0.0_f64.to_bits();
6878        let mut emitted = std::collections::HashSet::new();
6879        for term in &terms.linear_terms {
6880            // No numeric operand: the realized column is a pure cell indicator.
6881            assert!(term.feature_cols.is_empty());
6882            assert_eq!(term.categorical_levels.len(), 2);
6883            let mut gates = std::collections::HashMap::new();
6884            for &(col, bits) in &term.categorical_levels {
6885                gates.insert(col, bits);
6886            }
6887            let f_bits = *gates.get(&f_col).expect("f gate present");
6888            let g_bits = *gates.get(&g_col).expect("g gate present");
6889            // The reference cell f0:g0 must have been dropped.
6890            assert!(
6891                !(f_bits == f0 && g_bits == g0),
6892                "the reference cell f0:g0 must be absorbed by the intercept, not emitted"
6893            );
6894            emitted.insert((f_bits, g_bits));
6895
6896            let column = term
6897                .realized_design_column(ds.values.view())
6898                .expect("realize cross cell");
6899            for row in 0..n {
6900                let f = ds.values[[row, f_col]];
6901                let g = ds.values[[row, g_col]];
6902                let expected = if f.to_bits() == f_bits && g.to_bits() == g_bits {
6903                    1.0
6904                } else {
6905                    0.0
6906                };
6907                assert!(
6908                    (column[row] - expected).abs() < 1e-12,
6909                    "row {row}: expected {expected}, got {}",
6910                    column[row]
6911                );
6912            }
6913            assert!(
6914                column.iter().any(|&v| v == 1.0),
6915                "each cross cell must be observed in the data"
6916            );
6917        }
6918        // Every non-reference cross cell is present exactly once: all 6 cells
6919        // except f0:g0.
6920        let f_levels = [0.0_f64.to_bits(), 1.0_f64.to_bits(), 2.0_f64.to_bits()];
6921        let g_levels = [0.0_f64.to_bits(), 1.0_f64.to_bits()];
6922        for &fb in &f_levels {
6923            for &gb in &g_levels {
6924                if fb == f0 && gb == g0 {
6925                    continue;
6926                }
6927                assert!(
6928                    emitted.contains(&(fb, gb)),
6929                    "saturated cross cell must be present"
6930                );
6931            }
6932        }
6933    }
6934}