gam_models/fit_orchestration/materialize/
validation.rs1use super::*;
2use gam_terms::inference::formula_dsl::LinkMode;
3
4pub(crate) fn reject_marginal_slope_controls_for_transformation_normal(
5 config: &FitConfig,
6) -> Result<(), WorkflowError> {
7 let family_requests_marginal_slope = config.family.as_deref().is_some_and(|family| {
8 let canonical = family.to_ascii_lowercase().replace('_', "-");
9 canonical == "bernoulli-marginal-slope" || canonical == "binary-marginal-slope"
10 });
11 if family_requests_marginal_slope
12 || config.logslope_formula.is_some()
13 || config.z_column.is_some()
14 || config.ctn_stage1.is_some()
15 {
16 return Err(WorkflowError::InvalidConfig {
17 reason: "transformation_normal cannot be combined with marginal-slope family controls"
18 .to_string(),
19 });
20 }
21 Ok(())
22}
23
24pub(crate) fn reject_survival_only_terms_for_nonsurvival(
37 parsed: &ParsedFormula,
38) -> Result<(), WorkflowError> {
39 if parsed.timewiggle.is_some() {
40 return Err(WorkflowError::InvalidConfig {
41 reason: "timewiggle(...) is only supported in the main survival formula \
42 (a formula with a Surv(...) response); it is meaningless for a \
43 non-survival response and would otherwise be silently ignored"
44 .to_string(),
45 });
46 }
47 if parsed.survivalspec.is_some() {
48 return Err(WorkflowError::InvalidConfig {
49 reason: "survmodel(...) is only supported in the main survival formula \
50 (a formula with a Surv(...) response); it is meaningless for a \
51 non-survival response and would otherwise be silently ignored"
52 .to_string(),
53 });
54 }
55 Ok(())
56}
57
58pub(super) fn reject_explicit_linkwiggle_for_nonbinomial(
72 parsed: &ParsedFormula,
73 family: &LikelihoodSpec,
74) -> Result<(), WorkflowError> {
75 if parsed.linkwiggle.is_some() && !family.is_binomial() {
76 return Err(WorkflowError::InvalidConfig {
77 reason: "linkwiggle(...) corrects the link function of a binomial mean model \
78 and is only supported for a binomial response; it is meaningless for \
79 the resolved non-binomial family and would otherwise be silently ignored"
80 .to_string(),
81 });
82 }
83 Ok(())
84}
85
86pub(super) fn effective_link_choice_for_materialize(
87 parsed: &ParsedFormula,
88 config: &FitConfig,
89) -> Result<Option<LinkChoice>, WorkflowError> {
90 if let Some(linkspec) = parsed.linkspec.as_ref() {
91 if linkspec.mixture_rho.is_some()
92 || linkspec.sas_init.is_some()
93 || linkspec.beta_logistic_init.is_some()
94 {
95 return Err(WorkflowError::InvalidConfig {
96 reason: "link(...) initialization options are not supported by the materialized fit path; pass only link(type=...) in the formula"
97 .to_string(),
98 });
99 }
100 return parse_link_choice(Some(&linkspec.link), false).map_err(WorkflowError::from);
101 }
102 parse_link_choice(config.link.as_deref(), config.flexible_link).map_err(WorkflowError::from)
103}
104
105pub(super) fn reject_flexible_link_for_nonbinomial(
123 link_choice: Option<&LinkChoice>,
124 family: &LikelihoodSpec,
125) -> Result<(), WorkflowError> {
126 let requested_flexible =
127 link_choice.is_some_and(|choice| matches!(choice.mode, LinkMode::Flexible));
128 if requested_flexible && !family.is_binomial() {
129 return Err(WorkflowError::InvalidConfig {
130 reason: format!(
131 "flexible(...) links (the jointly-fit anchored spline link offset) are \
132 implemented only for a binomial response; the resolved family is {} (a \
133 non-binomial family), for which the link offset has no solver and would \
134 otherwise be silently discarded. Use the plain base link, or fit a binomial \
135 response.",
136 family.pretty_name()
137 ),
138 });
139 }
140 Ok(())
141}
142
143pub fn is_binary_response(y: ArrayView1<'_, f64>) -> bool {
145 if y.is_empty() {
146 return false;
147 }
148 y.iter()
149 .all(|v| (*v - 0.0).abs() < 1e-12 || (*v - 1.0).abs() < 1e-12)
150}
151
152pub(super) fn check_smooth_capacity(
167 spec: &gam_terms::smooth::TermCollectionSpec,
168 n_rows: usize,
169 response_name: &str,
170) -> Result<(), WorkflowError> {
171 let mut required: usize = 2;
173 let mut per_term: Vec<(String, usize)> = Vec::new();
174 for term in &spec.smooth_terms {
175 let need = term.basis.min_sample_rows();
176 required = required.saturating_add(need);
177 per_term.push((term.name.clone(), need));
178 }
179 if per_term.is_empty() || n_rows >= required {
180 return Ok(());
181 }
182 let breakdown = per_term
183 .iter()
184 .map(|(name, k)| format!("{name}≥{k}"))
185 .collect::<Vec<_>>()
186 .join(", ");
187 Err(WorkflowError::InvalidConfig {
188 reason: format!(
189 "not enough observations to fit the requested formula: dataset has n={n_rows} \
190 rows but the smooth terms on response '{response_name}' need at least \
191 {required} rows total ({breakdown}, plus intercept + smoothing-parameter dof) \
192 before REML estimation is well-posed. \
193 Fix: add more training rows, replace `s(x)` with a linear term, or pass a \
194 smaller basis via `s(x, k=3)`."
195 ),
196 })
197}