1use std::collections::BTreeMap;
2
3use pest::Parser;
4use pest::iterators::Pair;
5use pest_derive::Parser;
6
7use crate::smooth::BoundedCoefficientPriorSpec;
8use gam_problem::types::{
9 InverseLink, LikelihoodSpec, LinkComponent, LinkFunction, StandardLink, WigglePenaltyConfig,
10};
11
12#[derive(Parser)]
13#[grammar_inline = r#"
14WHITESPACE = _{ " " | "\t" | NEWLINE }
15
16top_function_call = { SOI ~ function_call ~ EOI }
17top_expr = { SOI ~ expr ~ EOI }
18formula = { SOI ~ expr ~ "~" ~ rhs ~ EOI }
19rhs = { term ~ ("+" ~ term)* }
20term = { expr }
21
22expr = { sum }
23sum = { product ~ (add_op ~ product)* }
24add_op = { "+" | "-" }
25product = { interact ~ (mul_op ~ interact)* }
26mul_op = { "*" | "/" }
27interact = { power ~ (interact_op ~ power)* }
28interact_op = { ":" }
29power = { unary ~ (pow_op ~ unary)* }
30pow_op = { "^" }
31unary = { unary_op* ~ primary }
32unary_op = _{ "+" | "-" }
33
34primary = { function_call | list_lit | tuple_lit | ident | number | string_lit | "(" ~ expr ~ ")" }
35list_lit = @{ "[" ~ (!"]" ~ ANY)* ~ "]" }
36tuple_lit = @{ "(" ~ (!("," | ")") ~ ANY)+ ~ "," ~ (!")" ~ ANY)* ~ ")" }
37function_call = { ident ~ "(" ~ arg_list? ~ ")" }
38arg_list = { arg ~ ("," ~ arg)* }
39arg = { named_arg | expr }
40named_arg = { ident ~ "=" ~ expr }
41
42ident = @{ ident_start ~ ident_continue* }
43ident_start = _{ ASCII_ALPHA | "_" }
44ident_continue = _{ ASCII_ALPHANUMERIC | "_" | "." }
45
46number = @{
47 "-"?
48 ~ (ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT*)? | "." ~ ASCII_DIGIT+)
49 ~ (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)?
50}
51
52string_lit = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" | "'" ~ (!"'" ~ ANY)* ~ "'" }
53"#]
54struct FormulaParser;
55
56#[derive(Clone, Debug, PartialEq, Eq)]
57pub struct FormulaDslParse {
58 pub response_expr: String,
59 pub rhs_terms: Vec<String>,
60}
61
62#[derive(Clone, Debug, PartialEq, Eq)]
63pub enum CallArgSpec {
64 Positional(String),
65 Named { key: String, value: String },
66}
67
68#[derive(Clone, Debug, PartialEq, Eq)]
69pub struct FunctionCallSpec {
70 pub name: String,
71 pub args: Vec<CallArgSpec>,
72}
73
74#[derive(Clone, Debug, PartialEq, Eq)]
85pub enum FormulaDslError {
86 ParseError { reason: String },
90 UnknownIdentifier { reason: String },
94 InvalidArgument { reason: String },
97 IncompatibleTerm { reason: String },
101 MalformedConfig { reason: String },
105}
106
107gam_linalg::impl_reason_error_boilerplate! {
108 FormulaDslError {
109 ParseError,
110 UnknownIdentifier,
111 InvalidArgument,
112 IncompatibleTerm,
113 MalformedConfig,
114 }
115}
116
117impl From<String> for FormulaDslError {
123 fn from(reason: String) -> Self {
124 FormulaDslError::ParseError { reason }
125 }
126}
127
128pub fn parse_formula_dsl(formula: &str) -> Result<FormulaDslParse, String> {
129 validate_balanced_delimiters(formula, "invalid formula syntax")?;
130 let mut parsed =
131 FormulaParser::parse(Rule::formula, formula).map_err(|e| FormulaDslError::ParseError {
132 reason: format!("invalid formula syntax: {e}"),
133 })?;
134 let formula_pair = parsed.next().ok_or_else(|| FormulaDslError::ParseError {
135 reason: "invalid formula syntax: empty parse".to_string(),
136 })?;
137
138 let mut response_expr: Option<String> = None;
139 let mut rhs_terms: Option<Vec<String>> = None;
140
141 for part in formula_pair.into_inner() {
142 match part.as_rule() {
143 Rule::expr if response_expr.is_none() => {
144 response_expr = Some(part.as_str().trim().to_string());
145 }
146 Rule::rhs => {
147 rhs_terms = Some(extract_rhs_terms(part)?);
148 }
149 _ => {}
150 }
151 }
152
153 let response_expr = response_expr.ok_or_else(|| FormulaDslError::ParseError {
154 reason: "invalid formula: missing response expression".to_string(),
155 })?;
156 let rhs_terms = rhs_terms.ok_or_else(|| FormulaDslError::ParseError {
157 reason: "invalid formula: missing RHS terms".to_string(),
158 })?;
159 if rhs_terms.is_empty() {
160 return Err(FormulaDslError::ParseError {
161 reason: "formula has no usable terms".to_string(),
162 }
163 .into());
164 }
165
166 Ok(FormulaDslParse {
167 response_expr,
168 rhs_terms,
169 })
170}
171
172fn delimiter_balance_error(prefix: &str) -> String {
173 format!("{prefix}: unbalanced parentheses or quotes")
174}
175
176fn validate_balanced_delimiters(input: &str, prefix: &str) -> Result<(), String> {
180 let mut stack = Vec::<char>::new();
181 let mut in_single = false;
182 let mut in_double = false;
183
184 for ch in input.chars() {
185 match ch {
186 '\'' if !in_double => in_single = !in_single,
187 '"' if !in_single => in_double = !in_double,
188 '(' | '[' | '{' if !in_single && !in_double => stack.push(ch),
189 ')' | ']' | '}' if !in_single && !in_double => {
190 let expected = match ch {
191 ')' => '(',
192 ']' => '[',
193 _ => '{',
195 };
196 if stack.pop() != Some(expected) {
197 return Err(FormulaDslError::ParseError {
198 reason: delimiter_balance_error(prefix),
199 }
200 .into());
201 }
202 }
203 _ => {}
204 }
205 }
206
207 if in_single || in_double || !stack.is_empty() {
208 return Err(FormulaDslError::ParseError {
209 reason: delimiter_balance_error(prefix),
210 }
211 .into());
212 }
213 Ok(())
214}
215
216fn extract_rhs_terms(rhs: Pair<'_, Rule>) -> Result<Vec<String>, String> {
217 let mut out = Vec::new();
218 let mut depth = 0_i32;
219 let mut in_single = false;
220 let mut in_double = false;
221 let mut start = 0_usize;
222 let mut last_significant: Option<char> = None;
235 let text = rhs.as_str();
236 let bytes = text.as_bytes();
237 for (idx, &b) in bytes.iter().enumerate() {
238 let ch = b as char;
239 match ch {
240 '\'' if !in_double => in_single = !in_single,
241 '"' if !in_single => in_double = !in_double,
242 '(' | '[' | '{' if !in_single && !in_double => depth += 1,
243 ')' | ']' | '}' if !in_single && !in_double && depth > 0 => depth -= 1,
244 '+' if !in_single
245 && !in_double
246 && depth == 0
247 && !matches!(
248 last_significant,
249 None | Some(':' | '*' | '/' | '^' | '+' | '-')
250 ) =>
251 {
252 let term = text[start..idx].trim();
253 if term.is_empty() {
254 return Err(FormulaDslError::ParseError {
255 reason: "formula RHS contains an empty term".to_string(),
256 }
257 .into());
258 }
259 out.push(term.to_string());
260 start = idx + 1;
261 }
262 _ => {}
263 }
264 if !ch.is_ascii_whitespace() {
265 last_significant = Some(ch);
266 }
267 }
268 if in_single || in_double || depth != 0 {
269 return Err(FormulaDslError::ParseError {
270 reason: "formula RHS has unbalanced quotes or parentheses".to_string(),
271 }
272 .into());
273 }
274 let tail = text[start..].trim();
275 if tail.is_empty() {
276 return Err(FormulaDslError::ParseError {
277 reason: "formula RHS contains an empty term".to_string(),
278 }
279 .into());
280 }
281 out.push(tail.to_string());
282 Ok(out)
283}
284
285type WrAtomList = Vec<String>;
307
308fn expand_wr_term(raw: &str) -> Result<Vec<WrAtomList>, String> {
309 let mut parsed = FormulaParser::parse(Rule::top_expr, raw).map_err(|e| {
310 FormulaDslError::ParseError {
311 reason: format!("invalid term syntax in `{raw}`: {e}"),
312 }
313 .to_string()
314 })?;
315 let top = parsed.next().ok_or_else(|| {
316 FormulaDslError::ParseError {
317 reason: format!("invalid term syntax in `{raw}`: empty parse"),
318 }
319 .to_string()
320 })?;
321 let expr = top
322 .into_inner()
323 .find(|p| p.as_rule() == Rule::expr)
324 .ok_or_else(|| {
325 FormulaDslError::ParseError {
326 reason: format!("invalid term syntax in `{raw}`: missing expr"),
327 }
328 .to_string()
329 })?;
330 let interactions = expand_expr(expr, raw)?;
331 let normalized: Vec<WrAtomList> = interactions
332 .into_iter()
333 .map(normalize_interaction)
334 .collect();
335 let mut seen = std::collections::BTreeSet::<Vec<String>>::new();
338 let mut out = Vec::<WrAtomList>::new();
339 for term in normalized {
340 let key = term.clone();
341 if seen.insert(key) {
342 out.push(term);
343 }
344 }
345 Ok(out)
346}
347
348fn normalize_interaction(mut atoms: WrAtomList) -> WrAtomList {
349 atoms.sort();
350 atoms.dedup();
351 atoms
352}
353
354fn expand_expr(pair: Pair<'_, Rule>, raw: &str) -> Result<Vec<WrAtomList>, String> {
355 match pair.as_rule() {
356 Rule::expr => {
357 let inner = pair.into_inner().next().ok_or_else(|| {
358 FormulaDslError::ParseError {
359 reason: format!("invalid term syntax in `{raw}`: empty expr"),
360 }
361 .to_string()
362 })?;
363 expand_expr(inner, raw)
364 }
365 Rule::sum => {
366 let mut iter = pair.into_inner();
367 let first = iter.next().ok_or_else(|| {
368 FormulaDslError::ParseError {
369 reason: format!("invalid term syntax in `{raw}`: empty sum"),
370 }
371 .to_string()
372 })?;
373 let mut acc = expand_expr(first, raw)?;
374 while let Some(op) = iter.next() {
375 if op.as_rule() != Rule::add_op {
376 return Err(FormulaDslError::ParseError {
377 reason: format!(
378 "invalid term syntax in `{raw}`: expected add operator, got `{:?}`",
379 op.as_rule()
380 ),
381 }
382 .into());
383 }
384 let op_str = op.as_str().trim();
385 let operand = iter.next().ok_or_else(|| {
386 FormulaDslError::ParseError {
387 reason: format!("invalid term syntax in `{raw}`: dangling `{op_str}`"),
388 }
389 .to_string()
390 })?;
391 if op_str == "-" {
392 return Err(FormulaDslError::IncompatibleTerm {
393 reason: format!(
394 "binary `-` is not supported inside a formula term in `{raw}` \
395 (use multiple `+` terms or drop the unwanted predictor explicitly)"
396 ),
397 }
398 .into());
399 }
400 let mut rhs = expand_expr(operand, raw)?;
401 acc.append(&mut rhs);
402 }
403 Ok(acc)
404 }
405 Rule::product => {
406 let mut iter = pair.into_inner();
407 let first = iter.next().ok_or_else(|| {
408 FormulaDslError::ParseError {
409 reason: format!("invalid term syntax in `{raw}`: empty product"),
410 }
411 .to_string()
412 })?;
413 let mut acc = expand_expr(first, raw)?;
414 while let Some(op) = iter.next() {
415 if op.as_rule() != Rule::mul_op {
416 return Err(FormulaDslError::ParseError {
417 reason: format!(
418 "invalid term syntax in `{raw}`: expected `*` or `/`, got `{:?}`",
419 op.as_rule()
420 ),
421 }
422 .into());
423 }
424 let op_str = op.as_str().trim();
425 let operand = iter.next().ok_or_else(|| {
426 FormulaDslError::ParseError {
427 reason: format!("invalid term syntax in `{raw}`: dangling `{op_str}`"),
428 }
429 .to_string()
430 })?;
431 let rhs = expand_expr(operand, raw)?;
432 acc = match op_str {
433 "*" => wr_cross(acc, rhs),
434 "/" => wr_nest(acc, rhs),
435 other => {
436 return Err(FormulaDslError::ParseError {
437 reason: format!(
438 "invalid term syntax in `{raw}`: unrecognized mul operator `{other}`"
439 ),
440 }
441 .into());
442 }
443 };
444 }
445 Ok(acc)
446 }
447 Rule::interact => {
448 let mut iter = pair.into_inner();
449 let first = iter.next().ok_or_else(|| {
450 FormulaDslError::ParseError {
451 reason: format!("invalid term syntax in `{raw}`: empty interact"),
452 }
453 .to_string()
454 })?;
455 let mut acc = expand_expr(first, raw)?;
456 while let Some(op) = iter.next() {
457 if op.as_rule() != Rule::interact_op {
458 return Err(FormulaDslError::ParseError {
459 reason: format!(
460 "invalid term syntax in `{raw}`: expected `:`, got `{:?}`",
461 op.as_rule()
462 ),
463 }
464 .into());
465 }
466 let operand = iter.next().ok_or_else(|| {
467 FormulaDslError::ParseError {
468 reason: format!("invalid term syntax in `{raw}`: dangling `:`"),
469 }
470 .to_string()
471 })?;
472 let rhs = expand_expr(operand, raw)?;
473 acc = wr_interact(acc, rhs, raw)?;
474 }
475 Ok(acc)
476 }
477 Rule::power => {
478 let mut iter = pair.into_inner();
479 let first = iter.next().ok_or_else(|| {
480 FormulaDslError::ParseError {
481 reason: format!("invalid term syntax in `{raw}`: empty power"),
482 }
483 .to_string()
484 })?;
485 let base = expand_expr(first, raw)?;
486 let Some(op) = iter.next() else {
487 return Ok(base);
488 };
489 if op.as_rule() != Rule::pow_op {
490 return Err(FormulaDslError::ParseError {
491 reason: format!(
492 "invalid term syntax in `{raw}`: expected `^`, got `{:?}`",
493 op.as_rule()
494 ),
495 }
496 .into());
497 }
498 let exponent_pair = iter.next().ok_or_else(|| {
499 FormulaDslError::ParseError {
500 reason: format!("invalid term syntax in `{raw}`: dangling `^`"),
501 }
502 .to_string()
503 })?;
504 let exp_text = exponent_pair.as_str().trim();
505 let n: usize = exp_text.parse().map_err(|_| {
506 FormulaDslError::ParseError {
507 reason: format!(
508 "invalid term syntax in `{raw}`: `^` exponent must be a positive integer, got `{exp_text}`"
509 ),
510 }
511 .to_string()
512 })?;
513 if n == 0 {
514 return Err(FormulaDslError::ParseError {
515 reason: format!(
516 "invalid term syntax in `{raw}`: `^0` is not a meaningful formula expansion"
517 ),
518 }
519 .into());
520 }
521 if let Some(extra_op) = iter.next() {
522 let extra = extra_op.as_str().trim();
523 return Err(FormulaDslError::ParseError {
524 reason: format!(
525 "invalid term syntax in `{raw}`: chained `^` operators are not supported; \
526 use one positive integer exponent, got another `{extra}`"
527 ),
528 }
529 .into());
530 }
531 Ok(wr_power(base, n))
532 }
533 Rule::unary => {
534 let unary_start = pair.as_span().start();
535 for inner in pair.into_inner() {
536 if inner.as_rule() == Rule::primary {
537 let prefix_len = inner.as_span().start().saturating_sub(unary_start);
538 if prefix_len > 0 {
539 let prefix = &raw[unary_start..inner.as_span().start()];
540 if prefix.chars().any(|ch| matches!(ch, '+' | '-')) {
541 return Err(FormulaDslError::IncompatibleTerm {
542 reason: format!(
543 "unary `+`/`-` is not supported inside a formula term in `{raw}`"
544 ),
545 }
546 .into());
547 }
548 }
549 return expand_expr(inner, raw);
550 }
551 }
552 Err(FormulaDslError::ParseError {
553 reason: format!("invalid term syntax in `{raw}`: empty unary"),
554 }
555 .into())
556 }
557 Rule::primary => {
558 let span = pair.as_str().trim().to_string();
559 let inner = pair.into_inner().next();
560 match inner {
561 Some(child) if child.as_rule() == Rule::expr => expand_expr(child, raw),
562 Some(child) => Ok(vec![vec![child.as_str().trim().to_string()]]),
563 None => Ok(vec![vec![span]]),
564 }
565 }
566 _ => Err(FormulaDslError::ParseError {
567 reason: format!(
568 "invalid term syntax in `{raw}`: unexpected node `{:?}`",
569 pair.as_rule()
570 ),
571 }
572 .into()),
573 }
574}
575
576fn wr_cross(left: Vec<WrAtomList>, right: Vec<WrAtomList>) -> Vec<WrAtomList> {
577 let mut out = Vec::with_capacity(left.len() + right.len() + left.len() * right.len());
579 out.extend(left.iter().cloned());
580 out.extend(right.iter().cloned());
581 for l in &left {
582 for r in &right {
583 let mut merged: WrAtomList = l.iter().cloned().chain(r.iter().cloned()).collect();
584 merged.sort();
585 merged.dedup();
586 out.push(merged);
587 }
588 }
589 out
590}
591
592fn wr_nest(left: Vec<WrAtomList>, right: Vec<WrAtomList>) -> Vec<WrAtomList> {
593 let mut out = Vec::with_capacity(left.len() + left.len() * right.len());
595 out.extend(left.iter().cloned());
596 for l in &left {
597 for r in &right {
598 let mut merged: WrAtomList = l.iter().cloned().chain(r.iter().cloned()).collect();
599 merged.sort();
600 merged.dedup();
601 out.push(merged);
602 }
603 }
604 out
605}
606
607fn wr_interact(
608 left: Vec<WrAtomList>,
609 right: Vec<WrAtomList>,
610 raw: &str,
611) -> Result<Vec<WrAtomList>, String> {
612 let mut out = Vec::with_capacity(left.len() * right.len());
614 for l in &left {
615 for r in &right {
616 let combined: WrAtomList = l.iter().cloned().chain(r.iter().cloned()).collect();
617 for atom in &combined {
621 if atom.contains('(') {
622 return Err(FormulaDslError::IncompatibleTerm {
623 reason: format!(
624 "interaction operator `:` with function-call atom is not supported in `{raw}`. \
625 Use te(...) for smooth interactions or group()/factor() with a separate \
626 interaction strategy for categorical effects."
627 ),
628 }
629 .into());
630 }
631 }
632 let mut merged = combined;
633 merged.sort();
634 merged.dedup();
635 out.push(merged);
636 }
637 }
638 Ok(out)
639}
640
641fn wr_power(base: Vec<WrAtomList>, n: usize) -> Vec<WrAtomList> {
642 if base.is_empty() {
647 return Vec::new();
648 }
649 let m = base.len();
650 let mut out = Vec::<WrAtomList>::new();
651 let max_size = n.min(m);
652 for size in 1..=max_size {
653 let mut indices: Vec<usize> = (0..size).collect();
655 loop {
656 let mut merged = WrAtomList::new();
657 for &i in &indices {
658 merged.extend(base[i].iter().cloned());
659 }
660 merged.sort();
661 merged.dedup();
662 out.push(merged);
663 let mut k = size;
665 while k > 0 {
666 k -= 1;
667 if indices[k] != k + m - size {
668 indices[k] += 1;
669 for j in (k + 1)..size {
670 indices[j] = indices[j - 1] + 1;
671 }
672 break;
673 }
674 if k == 0 {
675 k = usize::MAX;
676 break;
677 }
678 }
679 if k == usize::MAX {
680 break;
681 }
682 }
683 }
684 out
685}
686
687fn is_exact_ident(raw: &str) -> bool {
688 let mut chars = raw.chars();
689 let Some(first) = chars.next() else {
690 return false;
691 };
692 if !first.is_ascii_alphabetic() && first != '_' {
693 return false;
694 }
695 chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '.')
696}
697
698pub fn parse_function_call(input: &str) -> Result<FunctionCallSpec, String> {
699 validate_balanced_delimiters(input, "invalid function call syntax")?;
700 let mut parsed = FormulaParser::parse(Rule::top_function_call, input).map_err(|e| {
701 FormulaDslError::ParseError {
702 reason: format!("invalid function call syntax: {e}"),
703 }
704 })?;
705 let top = parsed.next().ok_or_else(|| FormulaDslError::ParseError {
706 reason: "invalid function call syntax: empty parse".to_string(),
707 })?;
708 let call = top
709 .into_inner()
710 .find(|p| p.as_rule() == Rule::function_call)
711 .ok_or_else(|| FormulaDslError::ParseError {
712 reason: "invalid function call syntax: missing call".to_string(),
713 })?;
714 parse_call_pair(call)
715}
716
717fn parse_call_pair(call: Pair<'_, Rule>) -> Result<FunctionCallSpec, String> {
718 let mut name: Option<String> = None;
719 let mut args = Vec::<CallArgSpec>::new();
720 for part in call.into_inner() {
721 match part.as_rule() {
722 Rule::ident => {
723 if name.is_none() {
724 name = Some(part.as_str().trim().to_string());
725 }
726 }
727 Rule::arg_list => {
728 for a in part.into_inner() {
729 if a.as_rule() != Rule::arg {
730 continue;
731 }
732 let mut a_inner = a.into_inner();
733 let Some(first) = a_inner.next() else {
734 continue;
735 };
736 match first.as_rule() {
737 Rule::named_arg => {
738 let mut ni = first.into_inner();
739 let key = ni
740 .next()
741 .ok_or_else(|| FormulaDslError::ParseError {
742 reason: "invalid named argument key".to_string(),
743 })?
744 .as_str()
745 .trim()
746 .to_ascii_lowercase();
747 let value = ni
748 .next()
749 .ok_or_else(|| FormulaDslError::ParseError {
750 reason: "invalid named argument value".to_string(),
751 })?
752 .as_str()
753 .trim()
754 .to_string();
755 args.push(CallArgSpec::Named { key, value });
756 }
757 Rule::expr => {
758 args.push(CallArgSpec::Positional(first.as_str().trim().to_string()));
759 }
760 _ => {}
761 }
762 }
763 }
764 _ => {}
765 }
766 }
767 let name = name.ok_or_else(|| FormulaDslError::ParseError {
768 reason: "invalid function call: missing name".to_string(),
769 })?;
770 Ok(FunctionCallSpec { name, args })
771}
772
773#[cfg(test)]
774mod tests {
775 use super::{
776 CallArgSpec, ParsedTerm, parse_formula, parse_formula_dsl, parse_function_call,
777 parse_linkwiggle_formulaspec, parsed_term_column_names, parsed_terms_reference_column,
778 validate_marginal_slope_z_column_exclusion,
779 };
780 use std::collections::{BTreeMap, BTreeSet};
781
782 #[test]
783 fn parsed_term_column_names_includes_by_smooth_grouping_variable() {
784 let parsed =
792 parse_formula("y ~ s(x, by=g) + z + a:b").expect("formula with a by= smooth parses");
793 let mut cols = BTreeSet::<String>::new();
794 parsed_term_column_names(&parsed.terms, &mut cols);
795 for expected in ["x", "g", "z", "a", "b"] {
796 assert!(
797 cols.contains(expected),
798 "parsed_term_column_names dropped '{expected}'; got {cols:?}"
799 );
800 }
801 assert!(
803 !cols.contains("y"),
804 "response leaked into term columns: {cols:?}"
805 );
806 }
807
808 #[test]
809 fn linkwiggle_parser_does_not_bake_in_cubic_only_restriction() {
810 for deg in [2usize, 4, 5, 10] {
820 let mut options = BTreeMap::new();
821 options.insert("degree".to_string(), deg.to_string());
822 options.insert("internal_knots".to_string(), "3".to_string());
823 let raw = format!("timewiggle(degree={deg}, internal_knots=3)");
824 let spec = parse_linkwiggle_formulaspec(&options, &raw)
825 .expect("non-cubic wiggle degree must parse at the shared layer");
826 assert_eq!(
827 spec.degree, deg,
828 "parser must carry the requested degree through verbatim"
829 );
830 }
831
832 let mut zero = BTreeMap::new();
835 zero.insert("degree".to_string(), "0".to_string());
836 zero.insert("internal_knots".to_string(), "3".to_string());
837 let err = parse_linkwiggle_formulaspec(&zero, "linkwiggle(degree=0, internal_knots=3)")
838 .expect_err("degree=0 must be rejected");
839 assert!(
840 err.contains("degree >= 1"),
841 "error should state the positive-degree lower bound, got: {err}"
842 );
843 }
844
845 #[test]
846 fn parses_nested_formula_terms() {
847 let parsed =
848 parse_formula_dsl("log(y) ~ x1 + s(log(x2 + 1), bs=\"tps\", k=10) + te(x3, x4)")
849 .expect("parse");
850 assert_eq!(parsed.response_expr, "log(y)");
851 assert_eq!(parsed.rhs_terms.len(), 3);
852 assert_eq!(parsed.rhs_terms[0], "x1");
853 assert_eq!(parsed.rhs_terms[1], "s(log(x2 + 1), bs=\"tps\", k=10)");
854 assert_eq!(parsed.rhs_terms[2], "te(x3, x4)");
855 }
856
857 #[test]
858 fn parses_cyclic_formula_aliases() {
859 let parsed = parse_formula("y ~ cyclic(theta, period_start=0, period_end=6.283)")
860 .expect("parse cyclic formula");
861 match &parsed.terms[0] {
862 super::ParsedTerm::Smooth { vars, options, .. } => {
863 assert_eq!(vars, &vec!["theta".to_string()]);
864 assert_eq!(options.get("type").map(String::as_str), Some("cyclic"));
865 assert_eq!(options.get("period_start").map(String::as_str), Some("0"));
866 }
867 other => panic!("expected cyclic smooth term, got {other:?}"),
868 }
869 }
870
871 #[test]
872 fn sphere_aliases_all_dispatch_to_intrinsic_s2_basis() {
873 for alias in ["sphere", "sos", "spherical", "s2"] {
880 let parsed = parse_formula(&format!("y ~ {alias}(lat, lon)"))
881 .unwrap_or_else(|e| panic!("parse {alias}: {e}"));
882 match &parsed.terms[0] {
883 super::ParsedTerm::Smooth { vars, options, .. } => {
884 assert_eq!(
885 vars,
886 &vec!["lat".to_string(), "lon".to_string()],
887 "{alias} should keep (lat, lon) as its variables"
888 );
889 assert_eq!(
890 options.get("type").map(String::as_str),
891 Some("sphere"),
892 "{alias} must dispatch to the intrinsic sphere basis (type=sphere)"
893 );
894 }
895 other => panic!("expected sphere smooth term for {alias}, got {other:?}"),
896 }
897 }
898 }
899
900 #[test]
901 fn parses_function_callwithnamed_and_positional_args() {
902 let call = parse_function_call("s(log(x + 1), type=\"duchon\", centers=12)").expect("call");
903 assert_eq!(call.name, "s");
904 assert_eq!(call.args.len(), 3);
905 assert_eq!(
906 call.args[0],
907 CallArgSpec::Positional("log(x + 1)".to_string())
908 );
909 assert_eq!(
910 call.args[1],
911 CallArgSpec::Named {
912 key: "type".to_string(),
913 value: "\"duchon\"".to_string()
914 }
915 );
916 }
917
918 #[test]
919 fn parses_tensor_boundary_list_options() {
920 let call = parse_function_call(
921 "te(day_of_week, hour, boundary=['periodic', 'periodic'], period=[7, 24])",
922 )
923 .expect("call");
924 assert_eq!(call.name, "te");
925 assert_eq!(call.args.len(), 4);
926 assert_eq!(
927 call.args[2],
928 CallArgSpec::Named {
929 key: "boundary".to_string(),
930 value: "['periodic', 'periodic']".to_string(),
931 }
932 );
933 }
934
935 #[test]
936 fn parse_formula_dsl_reports_unbalanced_parentheses() {
937 let err = parse_formula_dsl("y ~ s(x, k=10").expect_err("expected parse failure");
938 assert!(err.contains("unbalanced parentheses"));
939 }
940
941 #[test]
942 fn parse_function_call_reports_unbalanced_parentheses() {
943 let err = parse_function_call("s(x, k=10").expect_err("expected parse failure");
944 assert!(err.contains("unbalanced parentheses"));
945 }
946
947 #[test]
948 fn parse_formula_accepts_tuple_smooth_options() {
949 let parsed = parse_formula("z ~ te(x, y, k=(20, 20))")
950 .expect("tuple-valued smooth option should parse");
951 assert_eq!(parsed.terms.len(), 1);
952
953 let dsl = parse_formula_dsl("z ~ te(x, y, k=(20, 20))")
954 .expect("tuple-valued smooth option should parse in the DSL layer");
955 assert_eq!(dsl.rhs_terms, vec!["te(x, y, k=(20, 20))"]);
956
957 let call = parse_function_call("te(x, y, k=(20, 20))")
958 .expect("tuple-valued smooth option should parse as a function call");
959 assert_eq!(
960 call.args[2],
961 CallArgSpec::Named {
962 key: "k".to_string(),
963 value: "(20, 20)".to_string(),
964 }
965 );
966 }
967
968 #[test]
969 fn parse_formula_rejects_unsupported_top_level_rhs_expressions() {
970 for formula in ["y ~ x - z", "y ~ -x", "y ~ (x)", "y ~ x - 1"] {
979 let err = parse_formula(formula).expect_err("expected formula parse failure");
980 assert!(err.to_string().contains("unsupported top-level RHS term"));
981 }
982 }
983
984 #[test]
990 fn parse_formula_supports_wr_slash_nesting() {
991 let parsed = parse_formula("y ~ x / z").expect("`/` is supported as WR nesting");
992 assert_eq!(parsed.response, "y");
993 assert_eq!(parsed.terms.len(), 2);
994 let names: Vec<String> = parsed
995 .terms
996 .iter()
997 .map(|t| match t {
998 ParsedTerm::Linear { name, .. } => format!("Linear({name})"),
999 ParsedTerm::Interaction { vars } => format!("Interaction({})", vars.join(":")),
1000 other => format!("Other({other:?})"),
1001 })
1002 .collect();
1003 assert_eq!(
1004 names,
1005 vec!["Linear(x)".to_string(), "Interaction(x:z)".to_string()]
1006 );
1007 }
1008
1009 #[test]
1014 fn parse_formula_supports_wr_star_crossing() {
1015 let parsed = parse_formula("y ~ x * z").expect("`*` is supported as WR crossing");
1016 assert_eq!(parsed.response, "y");
1017 assert_eq!(parsed.terms.len(), 3);
1018 let names: Vec<String> = parsed
1019 .terms
1020 .iter()
1021 .map(|t| match t {
1022 ParsedTerm::Linear { name, .. } => format!("Linear({name})"),
1023 ParsedTerm::Interaction { vars } => format!("Interaction({})", vars.join(":")),
1024 other => format!("Other({other:?})"),
1025 })
1026 .collect();
1027 assert_eq!(
1028 names,
1029 vec![
1030 "Linear(x)".to_string(),
1031 "Linear(z)".to_string(),
1032 "Interaction(x:z)".to_string(),
1033 ]
1034 );
1035 }
1036
1037 #[test]
1038 fn parse_formula_rejects_unary_signs_inside_wr_expansion() {
1039 for formula in ["y ~ x:-z", "y ~ a*-b", "y ~ x/-z", "y ~ x:+z"] {
1040 let err = parse_formula(formula)
1041 .expect_err("WR expansion must not silently drop unary signs");
1042 let msg = err.to_string();
1043 assert!(
1044 msg.contains("unary `+`/`-` is not supported"),
1045 "unexpected error for {formula}: {msg}"
1046 );
1047 }
1048 }
1049
1050 #[test]
1051 fn parse_formula_supports_wr_power_crossing() {
1052 let parsed = parse_formula("y ~ (x + z)^2").expect("`^` is supported as WR power");
1053 assert_eq!(parsed.response, "y");
1054 assert_eq!(parsed.terms.len(), 3);
1055 let names: Vec<String> = parsed
1056 .terms
1057 .iter()
1058 .map(|t| match t {
1059 ParsedTerm::Linear { name, .. } => format!("Linear({name})"),
1060 ParsedTerm::Interaction { vars } => format!("Interaction({})", vars.join(":")),
1061 other => format!("Other({other:?})"),
1062 })
1063 .collect();
1064 assert_eq!(
1065 names,
1066 vec![
1067 "Linear(x)".to_string(),
1068 "Linear(z)".to_string(),
1069 "Interaction(x:z)".to_string(),
1070 ]
1071 );
1072 }
1073
1074 #[test]
1075 fn parse_formula_rejects_chained_wr_power() {
1076 let err = parse_formula("y ~ (x + z)^2^3")
1077 .expect_err("chained WR powers must not silently drop later exponents");
1078 let msg = err.to_string();
1079 assert!(
1080 msg.contains("chained `^` operators are not supported"),
1081 "error should explain that chained powers are rejected, got: {msg}"
1082 );
1083 }
1084
1085 #[test]
1086 fn parsed_terms_reference_column_sees_the_by_smooth_variable() {
1087 let parsed = parse_formula("y ~ s(x, by=g)").expect("parse by-smooth");
1093 assert!(
1094 parsed_terms_reference_column(&parsed.terms, "g"),
1095 "s(x, by=g) references column g via options[\"by\"]"
1096 );
1097 assert!(parsed_terms_reference_column(&parsed.terms, "x"));
1098 assert!(!parsed_terms_reference_column(&parsed.terms, "absent"));
1099 }
1100
1101 #[test]
1102 fn marginal_slope_z_column_validator_detects_linear_and_smooth_reuse() {
1103 let main = parse_formula("y ~ x + z").expect("parse main");
1104 let logslope = parse_formula("y ~ s(z, type=duchon, centers=6)").expect("parse logslope");
1105
1106 assert!(parsed_terms_reference_column(&main.terms, "z"));
1107 assert!(parsed_terms_reference_column(&logslope.terms, "z"));
1108
1109 let err = validate_marginal_slope_z_column_exclusion(
1110 &main,
1111 &parse_formula("y ~ 1").expect("parse clean logslope"),
1112 "z",
1113 "bernoulli marginal-slope",
1114 "--logslope-formula",
1115 )
1116 .expect_err("main formula should be rejected");
1117 assert!(err.contains("cannot also appear in the main formula"));
1118
1119 let err = validate_marginal_slope_z_column_exclusion(
1120 &parse_formula("y ~ x").expect("parse clean main"),
1121 &logslope,
1122 "z",
1123 "bernoulli marginal-slope",
1124 "--logslope-formula",
1125 )
1126 .expect_err("logslope formula should be rejected");
1127 assert!(err.contains("cannot also appear in --logslope-formula"));
1128 }
1129
1130 #[test]
1131 fn logslope_surface_declarations_are_additive() {
1132 let parsed = parse_formula("y ~ s(pc1) + logslope(z2, s(pc2)) + logslope(z3, x3)")
1133 .expect("parse additive logslope surfaces");
1134 assert_eq!(parsed.terms.len(), 1);
1135 assert_eq!(parsed.logslope_surfaces.len(), 2);
1136 assert_eq!(parsed.logslope_surfaces[0].z_column, "z2");
1137 assert_eq!(parsed.logslope_surfaces[0].terms.len(), 1);
1138 assert_eq!(parsed.logslope_surfaces[1].z_column, "z3");
1139 assert_eq!(parsed.logslope_surfaces[1].terms.len(), 1);
1140 }
1141
1142 #[test]
1143 fn marginal_slope_z_column_validator_reserves_all_surface_z_columns() {
1144 let main = parse_formula("y ~ x").expect("parse main");
1145 let logslope = parse_formula("y ~ s(pc1) + logslope(z2, s(z3)) + logslope(z3, x)")
1146 .expect("parse logslope surfaces");
1147 let err = validate_marginal_slope_z_column_exclusion(
1148 &main,
1149 &logslope,
1150 "z1",
1151 "bernoulli marginal-slope",
1152 "--logslope-formula",
1153 )
1154 .expect_err("surface formula should reject another reserved z coordinate");
1155 assert!(err.contains("reserves z column 'z3'"));
1156 }
1157}
1158
1159#[derive(Clone, Debug)]
1164pub struct LinkWiggleFormulaSpec {
1165 pub degree: usize,
1166 pub num_internal_knots: usize,
1167 pub penalty_orders: Vec<usize>,
1168 pub double_penalty: bool,
1169}
1170
1171pub fn default_linkwiggle_formulaspec() -> LinkWiggleFormulaSpec {
1172 let cfg = WigglePenaltyConfig::cubic_triple_operator_default();
1173 LinkWiggleFormulaSpec {
1174 degree: cfg.degree,
1175 num_internal_knots: cfg.num_internal_knots,
1176 penalty_orders: cfg.penalty_orders,
1177 double_penalty: cfg.double_penalty,
1178 }
1179}
1180
1181#[derive(Clone, Debug)]
1182pub struct LinkFormulaSpec {
1183 pub link: String,
1184 pub mixture_rho: Option<String>,
1185 pub sas_init: Option<String>,
1186 pub beta_logistic_init: Option<String>,
1187}
1188
1189#[derive(Clone, Debug)]
1190pub struct SurvivalFormulaSpec {
1191 pub spec: Option<String>,
1192 pub survival_distribution: Option<String>,
1193}
1194
1195#[derive(Clone, Debug)]
1196pub struct ParsedFormula {
1197 pub response: String,
1198 pub terms: Vec<ParsedTerm>,
1199 pub logslope_surfaces: Vec<LogSlopeSurfaceSpec>,
1200 pub linkwiggle: Option<LinkWiggleFormulaSpec>,
1201 pub timewiggle: Option<LinkWiggleFormulaSpec>,
1202 pub linkspec: Option<LinkFormulaSpec>,
1203 pub survivalspec: Option<SurvivalFormulaSpec>,
1204}
1205
1206#[derive(Clone, Debug)]
1207pub struct LogSlopeSurfaceSpec {
1208 pub z_column: String,
1209 pub terms: Vec<ParsedTerm>,
1210}
1211
1212pub fn marginal_slope_logslope_surfaces(
1213 logslope_formula: &ParsedFormula,
1214 default_z_column: &str,
1215) -> Result<Vec<LogSlopeSurfaceSpec>, String> {
1216 let mut surfaces = Vec::new();
1217 if !logslope_formula.terms.is_empty() {
1218 surfaces.push(LogSlopeSurfaceSpec {
1219 z_column: default_z_column.to_string(),
1220 terms: logslope_formula.terms.clone(),
1221 });
1222 }
1223 surfaces.extend(logslope_formula.logslope_surfaces.clone());
1224 if surfaces.is_empty() {
1225 surfaces.push(LogSlopeSurfaceSpec {
1226 z_column: default_z_column.to_string(),
1227 terms: Vec::new(),
1228 });
1229 }
1230 let mut seen = std::collections::BTreeSet::<String>::new();
1231 for surface in &surfaces {
1232 if !seen.insert(surface.z_column.clone()) {
1233 return Err(FormulaDslError::IncompatibleTerm {
1234 reason: format!(
1235 "logslope formula declares z column '{}' more than once; each z coordinate needs exactly one log-slope surface",
1236 surface.z_column
1237 ),
1238 }
1239 .into());
1240 }
1241 }
1242 Ok(surfaces)
1243}
1244
1245#[derive(Clone, Debug)]
1246pub enum ParsedTerm {
1247 Linear {
1248 name: String,
1249 explicit: bool,
1250 coefficient_min: Option<f64>,
1251 coefficient_max: Option<f64>,
1252 },
1253 BoundedLinear {
1254 name: String,
1255 min: f64,
1256 max: f64,
1257 prior: BoundedCoefficientPriorSpec,
1258 },
1259 RandomEffect {
1260 name: String,
1261 },
1262 Smooth {
1263 label: String,
1264 vars: Vec<String>,
1265 kind: SmoothKind,
1266 options: BTreeMap<String, String>,
1267 },
1268 LinkWiggle {
1269 options: BTreeMap<String, String>,
1270 },
1271 TimeWiggle {
1272 options: BTreeMap<String, String>,
1273 },
1274 LinkConfig {
1275 options: BTreeMap<String, String>,
1276 },
1277 SurvivalConfig {
1278 options: BTreeMap<String, String>,
1279 },
1280 LogSlopeSurface {
1281 z_column: String,
1282 terms: Vec<ParsedTerm>,
1283 },
1284 Interaction {
1292 vars: Vec<String>,
1293 },
1294}
1295
1296pub fn parsed_term_column_names(
1305 terms: &[ParsedTerm],
1306 out: &mut std::collections::BTreeSet<String>,
1307) {
1308 for term in terms {
1309 match term {
1310 ParsedTerm::Linear { name, .. }
1311 | ParsedTerm::BoundedLinear { name, .. }
1312 | ParsedTerm::RandomEffect { name } => {
1313 out.insert(name.clone());
1314 }
1315 ParsedTerm::Smooth { vars, options, .. } => {
1316 out.extend(vars.iter().cloned());
1317 if let Some(by) = options.get("by") {
1318 out.insert(by.clone());
1319 }
1320 }
1321 ParsedTerm::Interaction { vars } => {
1322 out.extend(vars.iter().cloned());
1323 }
1324 ParsedTerm::LinkWiggle { .. }
1325 | ParsedTerm::TimeWiggle { .. }
1326 | ParsedTerm::LinkConfig { .. }
1327 | ParsedTerm::SurvivalConfig { .. } => {}
1328 ParsedTerm::LogSlopeSurface { z_column, terms } => {
1329 out.insert(z_column.clone());
1330 parsed_term_column_names(terms, out);
1331 }
1332 }
1333 }
1334}
1335
1336pub fn parsed_terms_reference_column(terms: &[ParsedTerm], column_name: &str) -> bool {
1337 terms.iter().any(|term| match term {
1338 ParsedTerm::Linear { name, .. }
1339 | ParsedTerm::BoundedLinear { name, .. }
1340 | ParsedTerm::RandomEffect { name } => name == column_name,
1341 ParsedTerm::Smooth { vars, options, .. } => {
1342 vars.iter().any(|var| var == column_name)
1343 || options.get("by").is_some_and(|by| by == column_name)
1344 }
1345 ParsedTerm::Interaction { vars } => vars.iter().any(|var| var == column_name),
1346 ParsedTerm::LinkWiggle { .. }
1347 | ParsedTerm::TimeWiggle { .. }
1348 | ParsedTerm::LinkConfig { .. }
1349 | ParsedTerm::SurvivalConfig { .. } => false,
1350 ParsedTerm::LogSlopeSurface { z_column, terms } => {
1351 z_column == column_name || parsed_terms_reference_column(terms, column_name)
1352 }
1353 })
1354}
1355
1356pub fn validate_marginal_slope_z_column_exclusion(
1357 main_formula: &ParsedFormula,
1358 logslope_formula: &ParsedFormula,
1359 z_column: &str,
1360 context: &str,
1361 logslope_label: &str,
1362) -> Result<(), String> {
1363 let surfaces = marginal_slope_logslope_surfaces(logslope_formula, z_column)?;
1364 let mut reserved_z_columns = std::collections::BTreeSet::<&str>::new();
1368 reserved_z_columns.insert(z_column);
1369 reserved_z_columns.extend(surfaces.iter().map(|surface| surface.z_column.as_str()));
1370
1371 for reserved in &reserved_z_columns {
1372 if parsed_terms_reference_column(&main_formula.terms, reserved) {
1373 return Err(FormulaDslError::IncompatibleTerm {
1374 reason: format!(
1375 "{context} reserves z column '{reserved}' as the auxiliary latent score; it cannot also appear in the main formula"
1376 ),
1377 }
1378 .into());
1379 }
1380 }
1381 for reserved in &reserved_z_columns {
1382 if parsed_terms_reference_column(&logslope_formula.terms, reserved) {
1383 return Err(FormulaDslError::IncompatibleTerm {
1384 reason: format!(
1385 "{context} reserves z column '{reserved}' as the auxiliary latent score; it cannot also appear in {logslope_label}"
1386 ),
1387 }
1388 .into());
1389 }
1390 for surface in &surfaces {
1391 if parsed_terms_reference_column(&surface.terms, reserved) {
1392 return Err(FormulaDslError::IncompatibleTerm {
1393 reason: format!(
1394 "{context} reserves z column '{reserved}' as an auxiliary latent score; it cannot also appear in {logslope_label}"
1395 ),
1396 }
1397 .into());
1398 }
1399 }
1400 }
1401 Ok(())
1402}
1403
1404#[derive(Clone, Copy, Debug)]
1405pub enum SmoothKind {
1406 S,
1407 Te,
1408 T2,
1412 Ti,
1418}
1419
1420#[derive(Clone, Copy, Debug)]
1421pub enum LinkMode {
1422 Strict,
1423 Flexible,
1424}
1425
1426#[derive(Clone, Debug)]
1427pub struct LinkChoice {
1428 pub mode: LinkMode,
1429 pub link: LinkFunction,
1430 pub mixture_components: Option<Vec<LinkComponent>>,
1431}
1432
1433pub fn effectivelinkwiggle_formulaspec(
1438 formula_linkwiggle: Option<&LinkWiggleFormulaSpec>,
1439 link_choice: Option<&LinkChoice>,
1440) -> Option<LinkWiggleFormulaSpec> {
1441 formula_linkwiggle.cloned().or_else(|| {
1442 link_choice.and_then(|choice| {
1443 if matches!(choice.mode, LinkMode::Flexible) {
1444 Some(default_linkwiggle_formulaspec())
1445 } else {
1446 None
1447 }
1448 })
1449 })
1450}
1451
1452pub const fn linkname_supports_joint_wiggle(link: LinkFunction) -> bool {
1453 !matches!(link, LinkFunction::Sas | LinkFunction::BetaLogistic)
1454}
1455
1456pub const fn linkchoice_supports_joint_wiggle(choice: &LinkChoice) -> bool {
1457 match &choice.mixture_components {
1458 None => linkname_supports_joint_wiggle(choice.link),
1459 Some(_) => false,
1460 }
1461}
1462
1463pub fn require_linkchoice_supports_joint_wiggle(
1464 choice: &LinkChoice,
1465 context: &str,
1466) -> Result<(), String> {
1467 if linkchoice_supports_joint_wiggle(choice) {
1468 Ok(())
1469 } else {
1470 Err(joint_wiggle_unsupported_link_message(context))
1471 }
1472}
1473
1474pub const fn likelihood_spec_supports_joint_wiggle(likelihood: &LikelihoodSpec) -> bool {
1475 inverse_link_supports_joint_wiggle(&likelihood.link)
1476}
1477
1478pub fn require_likelihood_spec_supports_joint_wiggle(
1479 likelihood: &LikelihoodSpec,
1480 context: &str,
1481) -> Result<(), String> {
1482 if likelihood_spec_supports_joint_wiggle(likelihood) {
1483 Ok(())
1484 } else {
1485 Err(joint_wiggle_unsupported_link_message(context))
1486 }
1487}
1488
1489pub const fn inverse_link_supports_joint_wiggle(link: &InverseLink) -> bool {
1490 matches!(
1491 link,
1492 InverseLink::Standard(StandardLink::Identity)
1493 | InverseLink::Standard(StandardLink::Log)
1494 | InverseLink::Standard(StandardLink::Logit)
1495 | InverseLink::Standard(StandardLink::Probit)
1496 | InverseLink::Standard(StandardLink::CLogLog)
1497 )
1498}
1499
1500pub fn require_inverse_link_supports_joint_wiggle(
1501 link: &InverseLink,
1502 context: &str,
1503) -> Result<(), String> {
1504 if inverse_link_supports_joint_wiggle(link) {
1505 Ok(())
1506 } else {
1507 Err(joint_wiggle_unsupported_link_message(context))
1508 }
1509}
1510
1511pub const fn binomial_inverse_link_supports_joint_wiggle(link: &InverseLink) -> bool {
1512 matches!(
1513 link,
1514 InverseLink::Standard(StandardLink::Logit)
1515 | InverseLink::Standard(StandardLink::Probit)
1516 | InverseLink::Standard(StandardLink::CLogLog)
1517 )
1518}
1519
1520pub fn require_binomial_inverse_link_supports_joint_wiggle(
1521 link: &InverseLink,
1522 context: &str,
1523) -> Result<(), String> {
1524 if binomial_inverse_link_supports_joint_wiggle(link) {
1525 Ok(())
1526 } else {
1527 Err(FormulaDslError::IncompatibleTerm {
1528 reason: format!(
1529 "{context} does not support identity, log, latent-cloglog, SAS, BetaLogistic, or Mixture links; wiggle is only available for jointly fitted logit/probit/cloglog links"
1530 ),
1531 }
1532 .into())
1533 }
1534}
1535
1536pub fn joint_wiggle_unsupported_link_message(context: &str) -> String {
1537 format!(
1538 "{context} does not support latent-cloglog, SAS, BetaLogistic, or Mixture links; wiggle is only available for jointly fitted standard links"
1539 )
1540}
1541
1542pub fn option_usize(map: &BTreeMap<String, String>, key: &str) -> Option<usize> {
1547 map.get(key).and_then(|v| v.parse::<usize>().ok())
1548}
1549
1550fn validate_known_term_options(
1557 term_name: &str,
1558 options: &BTreeMap<String, String>,
1559 known: &[&str],
1560 raw: &str,
1561) -> Result<(), String> {
1562 let known_set: std::collections::BTreeSet<&&str> = known.iter().collect();
1563 for key in options.keys() {
1564 if !known_set.contains(&key.as_str()) {
1565 let known_sorted = {
1566 let mut v = known.to_vec();
1567 v.sort_unstable();
1568 v.join(", ")
1569 };
1570 let known_hint = if known.is_empty() {
1571 "no options".to_string()
1572 } else {
1573 format!("[{known_sorted}]")
1574 };
1575 return Err(FormulaDslError::InvalidArgument {
1576 reason: format!(
1577 "{term_name}() does not accept option `{key}` (in `{raw}`); known options: {known_hint}"
1578 ),
1579 }
1580 .into());
1581 }
1582 }
1583 Ok(())
1584}
1585
1586pub fn option_usize_any(map: &BTreeMap<String, String>, keys: &[&str]) -> Option<usize> {
1587 for key in keys {
1588 if let Some(v) = option_usize(map, key) {
1589 return Some(v);
1590 }
1591 }
1592 None
1593}
1594
1595pub fn option_usize_strict(
1602 map: &BTreeMap<String, String>,
1603 key: &str,
1604) -> Result<Option<usize>, String> {
1605 match map.get(key) {
1606 None => Ok(None),
1607 Some(raw) => raw.parse::<usize>().map(Some).map_err(|err| {
1608 FormulaDslError::InvalidArgument {
1609 reason: format!(
1610 "option `{key}={raw}` is not a non-negative integer; \
1611 expected a whole number >= 0: {err}"
1612 ),
1613 }
1614 .into()
1615 }),
1616 }
1617}
1618
1619pub fn option_usize_any_strict(
1622 map: &BTreeMap<String, String>,
1623 keys: &[&str],
1624) -> Result<Option<usize>, String> {
1625 for key in keys {
1626 if let Some(v) = option_usize_strict(map, key)? {
1627 return Ok(Some(v));
1628 }
1629 }
1630 Ok(None)
1631}
1632
1633pub fn option_f64(map: &BTreeMap<String, String>, key: &str) -> Option<f64> {
1634 map.get(key).and_then(|v| v.parse::<f64>().ok())
1635}
1636
1637pub fn option_f64_strict(map: &BTreeMap<String, String>, key: &str) -> Result<Option<f64>, String> {
1641 match map.get(key) {
1642 None => Ok(None),
1643 Some(raw) => match raw.parse::<f64>() {
1644 Ok(v) if v.is_finite() => Ok(Some(v)),
1645 Ok(v) => Err(FormulaDslError::InvalidArgument {
1646 reason: format!("option `{key}={raw}` parses as {v} which is not a finite number"),
1647 }
1648 .into()),
1649 Err(err) => Err(FormulaDslError::InvalidArgument {
1650 reason: format!(
1651 "option `{key}={raw}` is not a valid number; expected a finite decimal: {err}"
1652 ),
1653 }
1654 .into()),
1655 },
1656 }
1657}
1658
1659pub fn option_bool(map: &BTreeMap<String, String>, key: &str) -> Option<bool> {
1660 map.get(key)
1661 .and_then(|v| match v.trim().to_ascii_lowercase().as_str() {
1662 "true" | "1" | "yes" | "y" => Some(true),
1663 "false" | "0" | "no" | "n" => Some(false),
1664 _ => None,
1665 })
1666}
1667
1668pub fn option_bool_strict(
1674 map: &BTreeMap<String, String>,
1675 key: &str,
1676) -> Result<Option<bool>, String> {
1677 match map.get(key) {
1678 None => Ok(None),
1679 Some(raw) => match raw.trim().to_ascii_lowercase().as_str() {
1680 "true" | "1" | "yes" | "y" => Ok(Some(true)),
1681 "false" | "0" | "no" | "n" => Ok(Some(false)),
1682 _ => Err(FormulaDslError::InvalidArgument {
1683 reason: format!(
1684 "option `{key}={raw}` is not a boolean; \
1685 expected one of true/false/yes/no/1/0"
1686 ),
1687 }
1688 .into()),
1689 },
1690 }
1691}
1692
1693pub fn strip_quotes(v: &str) -> &str {
1694 let b = v.as_bytes();
1695 if b.len() >= 2
1696 && ((b[0] == b'\'' && b[b.len() - 1] == b'\'') || (b[0] == b'"' && b[b.len() - 1] == b'"'))
1697 {
1698 &v[1..v.len() - 1]
1699 } else {
1700 v
1701 }
1702}
1703
1704fn parse_linear_constraint_bounds(
1709 options: &BTreeMap<String, String>,
1710 raw: &str,
1711) -> Result<(Option<f64>, Option<f64>), String> {
1712 let min = parse_optional_f64_option_alias(options, &["min", "lower"], raw, "linear")?;
1713 let max = parse_optional_f64_option_alias(options, &["max", "upper"], raw, "linear")?;
1714 if let (Some(min), Some(max)) = (min, max)
1715 && (!min.is_finite() || !max.is_finite() || min > max)
1716 {
1717 return Err(FormulaDslError::InvalidArgument {
1718 reason: format!(
1719 "linear coefficient constraints require finite min <= max, got min={min}, max={max}: {raw}"
1720 ),
1721 }
1722 .into());
1723 }
1724 Ok((min, max))
1725}
1726
1727fn parse_required_f64_option(
1728 options: &BTreeMap<String, String>,
1729 key: &str,
1730 raw: &str,
1731) -> Result<f64, String> {
1732 let value = options
1733 .get(key)
1734 .ok_or_else(|| FormulaDslError::MalformedConfig {
1735 reason: format!("bounded() is missing required '{key}' argument: {raw}"),
1736 })?;
1737 value.parse::<f64>().map_err(|err| {
1738 FormulaDslError::InvalidArgument {
1739 reason: format!(
1740 "bounded() argument '{key}' must be a finite number, got '{}': {err}: {raw}",
1741 value
1742 ),
1743 }
1744 .into()
1745 })
1746}
1747
1748fn parse_optional_f64_option(
1749 options: &BTreeMap<String, String>,
1750 key: &str,
1751 raw: &str,
1752) -> Result<Option<f64>, String> {
1753 match options.get(key) {
1754 Some(value) => value.parse::<f64>().map(Some).map_err(|err| {
1755 FormulaDslError::InvalidArgument {
1756 reason: format!(
1757 "bounded() argument '{key}' must be a finite number, got '{}': {err}: {raw}",
1758 value
1759 ),
1760 }
1761 .into()
1762 }),
1763 None => Ok(None),
1764 }
1765}
1766
1767fn parse_optional_f64_option_alias(
1768 options: &BTreeMap<String, String>,
1769 keys: &[&str],
1770 raw: &str,
1771 fn_label: &str,
1772) -> Result<Option<f64>, String> {
1773 let mut found: Option<(&str, f64)> = None;
1774 for key in keys {
1775 if let Some(value) = options.get(*key) {
1776 let parsed = value
1777 .parse::<f64>()
1778 .map_err(|err| FormulaDslError::InvalidArgument {
1779 reason: format!(
1780 "{fn_label}() argument '{key}' must be a finite number, got '{}': {err}: {raw}",
1781 value
1782 ),
1783 })?;
1784 if found.is_some() {
1785 return Err(FormulaDslError::IncompatibleTerm {
1786 reason: format!(
1787 "{fn_label}() cannot specify both '{}' and '{}': {raw}",
1788 found.expect("present").0,
1789 key
1790 ),
1791 }
1792 .into());
1793 }
1794 found = Some((key, parsed));
1795 }
1796 }
1797 Ok(found.map(|(_, v)| v))
1798}
1799
1800fn parse_linkwiggle_penalty_orders(raw: Option<&str>) -> Result<Vec<usize>, String> {
1801 let Some(raw) = raw.map(str::trim) else {
1802 return Ok(WigglePenaltyConfig::cubic_triple_operator_default().penalty_orders);
1803 };
1804 if raw.is_empty() {
1805 return Ok(WigglePenaltyConfig::cubic_triple_operator_default().penalty_orders);
1806 }
1807 let mut out = Vec::<usize>::new();
1808 for token in raw.split(',') {
1809 let t = token.trim().to_ascii_lowercase();
1810 if t.is_empty() {
1811 continue;
1812 }
1813 match t.as_str() {
1814 "all" => {
1815 out.extend([1, 2, 3]);
1816 }
1817 "slope" | "1" => out.push(1),
1818 "curvature" | "2" => out.push(2),
1819 "curvature-change" | "curvature_change" | "3" => out.push(3),
1820 _ => {
1821 return Err(FormulaDslError::InvalidArgument {
1822 reason: format!(
1823 "invalid linkwiggle penalty_order '{t}'; use all|slope|curvature|curvature-change or 1/2/3"
1824 ),
1825 }
1826 .into());
1827 }
1828 }
1829 }
1830 if out.is_empty() {
1831 out.extend(WigglePenaltyConfig::cubic_triple_operator_default().penalty_orders);
1832 }
1833 out.sort_unstable();
1834 out.dedup();
1835 Ok(out)
1836}
1837
1838pub fn parse_linkwiggle_formulaspec(
1839 options: &BTreeMap<String, String>,
1840 raw: &str,
1841) -> Result<LinkWiggleFormulaSpec, String> {
1842 let allowed = [
1843 "degree",
1844 "internal_knots",
1845 "penalty_order",
1846 "double_penalty",
1847 ];
1848 let unknown = options
1849 .keys()
1850 .filter(|key| !allowed.contains(&key.as_str()))
1851 .cloned()
1852 .collect::<Vec<_>>();
1853 let term_name = raw.split('(').next().unwrap_or("linkwiggle");
1854 if !unknown.is_empty() {
1855 return Err(FormulaDslError::InvalidArgument {
1856 reason: format!(
1857 "{}() does not support option(s) {}: {raw}",
1858 term_name,
1859 unknown.join(", ")
1860 ),
1861 }
1862 .into());
1863 }
1864 let defaults = WigglePenaltyConfig::cubic_triple_operator_default();
1865 let degree = option_usize_strict(options, "degree")?.unwrap_or(defaults.degree);
1881 if degree < 1 {
1882 return Err(FormulaDslError::InvalidArgument {
1883 reason: format!("{term_name}() requires degree >= 1: {raw}"),
1884 }
1885 .into());
1886 }
1887 let num_internal_knots =
1888 option_usize_strict(options, "internal_knots")?.unwrap_or(defaults.num_internal_knots);
1889 if num_internal_knots == 0 {
1890 return Err(FormulaDslError::InvalidArgument {
1891 reason: format!("{term_name}() requires internal_knots > 0: {raw}"),
1892 }
1893 .into());
1894 }
1895 let penalty_orders =
1896 parse_linkwiggle_penalty_orders(options.get("penalty_order").map(String::as_str))?;
1897 let double_penalty =
1898 option_bool_strict(options, "double_penalty")?.unwrap_or(defaults.double_penalty);
1899 Ok(LinkWiggleFormulaSpec {
1900 degree,
1901 num_internal_knots,
1902 penalty_orders,
1903 double_penalty,
1904 })
1905}
1906
1907fn parse_link_formulaspec(
1908 options: &BTreeMap<String, String>,
1909 raw: &str,
1910) -> Result<LinkFormulaSpec, String> {
1911 let link = options
1912 .get("type")
1913 .map(|s| s.trim().to_string())
1914 .ok_or_else(|| FormulaDslError::MalformedConfig {
1915 reason: format!("link() requires type=<link-name>: {raw}"),
1916 })?;
1917 if link.is_empty() {
1918 return Err(FormulaDslError::MalformedConfig {
1919 reason: format!("link() requires a non-empty type: {raw}"),
1920 }
1921 .into());
1922 }
1923 let mixture_rho = options.get("rho").map(|s| s.trim().to_string());
1924 let sas_init = options.get("sas_init").map(|s| s.trim().to_string());
1925 let beta_logistic_init = options
1926 .get("beta_logistic_init")
1927 .map(|s| s.trim().to_string());
1928 Ok(LinkFormulaSpec {
1929 link,
1930 mixture_rho,
1931 sas_init,
1932 beta_logistic_init,
1933 })
1934}
1935
1936fn parse_survival_formulaspec(
1937 options: &BTreeMap<String, String>,
1938 raw: &str,
1939) -> Result<SurvivalFormulaSpec, String> {
1940 if options.is_empty() {
1941 return Err(FormulaDslError::MalformedConfig {
1942 reason: format!(
1943 "survmodel() requires at least one named option (e.g., spec=..., distribution=...): {raw}"
1944 ),
1945 }
1946 .into());
1947 }
1948 Ok(SurvivalFormulaSpec {
1949 spec: options.get("spec").map(|s| s.trim().to_string()),
1950 survival_distribution: options.get("distribution").map(|s| s.trim().to_string()),
1951 })
1952}
1953
1954fn parse_bounded_priorspec(
1955 options: &BTreeMap<String, String>,
1956 min: f64,
1957 max: f64,
1958 raw: &str,
1959) -> Result<BoundedCoefficientPriorSpec, String> {
1960 let prior_mode = options.get("prior").map(|s| s.to_ascii_lowercase());
1961 let pull = options.get("pull").map(|s| s.to_ascii_lowercase());
1962 let target = parse_optional_f64_option(options, "target", raw)?;
1963 let strength = parse_optional_f64_option(options, "strength", raw)?;
1964
1965 let target_mode = target.is_some() || strength.is_some();
1966 if prior_mode.is_some() && pull.is_some() {
1967 return Err(FormulaDslError::IncompatibleTerm {
1968 reason: format!("bounded() cannot combine prior=... with pull=...: {raw}"),
1969 }
1970 .into());
1971 }
1972 if prior_mode.is_some() && target_mode {
1973 return Err(FormulaDslError::IncompatibleTerm {
1974 reason: format!("bounded() cannot combine prior=... with target/strength: {raw}"),
1975 }
1976 .into());
1977 }
1978 if pull.is_some() && target_mode {
1979 return Err(FormulaDslError::IncompatibleTerm {
1980 reason: format!("bounded() cannot combine pull=... with target/strength: {raw}"),
1981 }
1982 .into());
1983 }
1984
1985 if let Some(priorname) = prior_mode {
1986 return match priorname.as_str() {
1987 "none" => Ok(BoundedCoefficientPriorSpec::None),
1988 "uniform" | "log-jacobian" | "log_jacobian" | "jacobian" => {
1989 Ok(BoundedCoefficientPriorSpec::Uniform)
1990 }
1991 "center" => Ok(BoundedCoefficientPriorSpec::Beta { a: 2.0, b: 2.0 }),
1992 _ => Err(FormulaDslError::InvalidArgument {
1993 reason: format!(
1994 "bounded() prior must currently be one of none|uniform|log-jacobian|center, got '{}': {raw}",
1995 priorname
1996 ),
1997 }
1998 .into()),
1999 };
2000 }
2001
2002 if let Some(pull_mode) = pull {
2003 return match pull_mode.as_str() {
2004 "uniform" | "log-jacobian" | "log_jacobian" | "jacobian" => {
2005 Ok(BoundedCoefficientPriorSpec::Uniform)
2006 }
2007 "center" => Ok(BoundedCoefficientPriorSpec::Beta { a: 2.0, b: 2.0 }),
2008 _ => Err(FormulaDslError::InvalidArgument {
2009 reason: format!(
2010 "bounded() pull must currently be 'uniform'/'log-jacobian' or 'center', got '{}': {raw}",
2011 pull_mode
2012 ),
2013 }
2014 .into()),
2015 };
2016 }
2017
2018 if target_mode {
2019 let targetvalue = target.ok_or_else(|| FormulaDslError::MalformedConfig {
2020 reason: format!("bounded() target is required with strength: {raw}"),
2021 })?;
2022 let strengthvalue = strength.ok_or_else(|| FormulaDslError::MalformedConfig {
2023 reason: format!("bounded() strength is required with target: {raw}"),
2024 })?;
2025 if !(min < targetvalue && targetvalue < max) {
2026 return Err(FormulaDslError::InvalidArgument {
2027 reason: format!("bounded() target must lie strictly inside ({min}, {max}): {raw}"),
2028 }
2029 .into());
2030 }
2031 if !strengthvalue.is_finite() || strengthvalue <= 0.0 {
2032 return Err(FormulaDslError::InvalidArgument {
2033 reason: format!("bounded() strength must be finite and > 0: {raw}"),
2034 }
2035 .into());
2036 }
2037 let z = (targetvalue - min) / (max - min);
2038 let a = 1.0 + strengthvalue * z;
2039 let b = 1.0 + strengthvalue * (1.0 - z);
2040 return Ok(BoundedCoefficientPriorSpec::Beta { a, b });
2041 }
2042
2043 Ok(BoundedCoefficientPriorSpec::None)
2044}
2045
2046pub fn formula_rhs_text(formula: &str) -> Result<String, String> {
2051 let parsed = parse_formula_dsl(formula)?;
2052 if parsed.rhs_terms.is_empty() {
2053 return Err(FormulaDslError::ParseError {
2054 reason: "formula right-hand side cannot be empty".to_string(),
2055 }
2056 .into());
2057 }
2058 Ok(parsed.rhs_terms.join(" + "))
2059}
2060
2061pub fn parse_surv_response(
2068 lhs: &str,
2069) -> Result<Option<(Option<String>, String, String)>, FormulaDslError> {
2070 let trimmed = lhs.trim();
2071 let call = match parse_function_call(trimmed) {
2072 Ok(call) => call,
2073 Err(_) => return Ok(None),
2074 };
2075 if !call.name.eq_ignore_ascii_case("surv") {
2076 return Ok(None);
2077 }
2078 let vars = call
2079 .args
2080 .iter()
2081 .filter_map(|arg| match arg {
2082 CallArgSpec::Positional(v) => Some(v.trim().to_string()),
2083 CallArgSpec::Named { .. } => None,
2084 })
2085 .filter(|s| !s.is_empty())
2086 .collect::<Vec<_>>();
2087 match vars.len() {
2088 2 => Ok(Some((None, vars[0].clone(), vars[1].clone()))),
2092 3 => Ok(Some((
2093 Some(vars[0].clone()),
2094 vars[1].clone(),
2095 vars[2].clone(),
2096 ))),
2097 n => Err(FormulaDslError::InvalidArgument {
2098 reason: format!(
2099 "Surv(...) expects either Surv(time, event) (right-censored) or \
2100 Surv(entry, exit, event) (left-truncated); got {n} columns"
2101 ),
2102 }),
2103 }
2104}
2105
2106pub fn parse_surv_interval_response(
2121 lhs: &str,
2122) -> Result<Option<(String, String, String)>, FormulaDslError> {
2123 let trimmed = lhs.trim();
2124 let call = match parse_function_call(trimmed) {
2125 Ok(call) => call,
2126 Err(_) => return Ok(None),
2127 };
2128 if !call.name.eq_ignore_ascii_case("survinterval") {
2129 return Ok(None);
2130 }
2131 let vars = call
2132 .args
2133 .iter()
2134 .filter_map(|arg| match arg {
2135 CallArgSpec::Positional(v) => Some(v.trim().to_string()),
2136 CallArgSpec::Named { .. } => None,
2137 })
2138 .filter(|s| !s.is_empty())
2139 .collect::<Vec<_>>();
2140 match vars.len() {
2141 3 => Ok(Some((vars[0].clone(), vars[1].clone(), vars[2].clone()))),
2142 n => Err(FormulaDslError::InvalidArgument {
2143 reason: format!(
2144 "SurvInterval(...) expects SurvInterval(L, R, event) (interval-censored, the \
2145 observed bracket T ∈ (L, R]); got {n} columns"
2146 ),
2147 }),
2148 }
2149}
2150
2151fn top_level_formula_separator(input: &str) -> Result<Option<usize>, String> {
2152 let mut depth = 0_i32;
2153 let mut in_single = false;
2154 let mut in_double = false;
2155
2156 for (idx, ch) in input.char_indices() {
2157 match ch {
2158 '\'' if !in_double => in_single = !in_single,
2159 '"' if !in_single => in_double = !in_double,
2160 '(' | '[' | '{' if !in_single && !in_double => depth += 1,
2161 ')' | ']' | '}' if !in_single && !in_double && depth > 0 => depth -= 1,
2162 '~' if !in_single && !in_double && depth == 0 => return Ok(Some(idx)),
2163 _ => {}
2164 }
2165 }
2166
2167 if in_single || in_double || depth != 0 {
2168 return Err(FormulaDslError::ParseError {
2169 reason: "invalid auxiliary formula syntax: unbalanced parentheses or quotes"
2170 .to_string(),
2171 }
2172 .into());
2173 }
2174 Ok(None)
2175}
2176
2177pub fn parse_matching_auxiliary_formula(
2178 formula: &str,
2179 response: &str,
2180 flag_name: &str,
2181) -> Result<(String, ParsedFormula), FormulaDslError> {
2182 let rhs = formula.trim();
2183 if top_level_formula_separator(rhs)?.is_some() {
2184 return Err(FormulaDslError::InvalidArgument {
2185 reason: format!(
2186 "{flag_name} expects only the terms after '~', not a full 'response ~ terms' formula; use {flag_name} 's(x)' instead of {flag_name} 'y ~ s(x)' (or pass '1' for an intercept-only noise model)"
2187 ),
2188 });
2189 }
2190 let parsed_formula = parse_formula(&format!("{response} ~ {rhs}"))?;
2191 Ok((rhs.to_string(), parsed_formula))
2192}
2193
2194pub fn validate_auxiliary_formula_controls(
2195 parsed_formula: &ParsedFormula,
2196 flag_name: &str,
2197) -> Result<(), String> {
2198 if parsed_formula.linkwiggle.is_some() {
2199 return Err(FormulaDslError::IncompatibleTerm {
2200 reason: format!(
2201 "linkwiggle(...) is only supported in the main formula, not {flag_name}"
2202 ),
2203 }
2204 .into());
2205 }
2206 if parsed_formula.timewiggle.is_some() {
2207 return Err(FormulaDslError::IncompatibleTerm {
2208 reason: format!(
2209 "timewiggle(...) is only supported in the main survival formula, not {flag_name}"
2210 ),
2211 }
2212 .into());
2213 }
2214 if parsed_formula.linkspec.is_some() {
2215 return Err(FormulaDslError::IncompatibleTerm {
2216 reason: format!("link(...) is only supported in the main formula, not {flag_name}"),
2217 }
2218 .into());
2219 }
2220 if parsed_formula.survivalspec.is_some() {
2221 return Err(FormulaDslError::IncompatibleTerm {
2222 reason: format!(
2223 "survmodel(...) is only supported in the main survival formula, not {flag_name}"
2224 ),
2225 }
2226 .into());
2227 }
2228 if !parsed_formula.logslope_surfaces.is_empty() && flag_name != "--logslope-formula" {
2229 return Err(FormulaDslError::IncompatibleTerm {
2230 reason: format!(
2231 "logslope(...) is only supported in --logslope-formula, not {flag_name}"
2232 ),
2233 }
2234 .into());
2235 }
2236 Ok::<(), _>(())
2237}
2238
2239pub fn parse_formula(formula: &str) -> Result<ParsedFormula, FormulaDslError> {
2240 let parsed_dsl =
2241 parse_formula_dsl(formula).map_err(|reason| FormulaDslError::ParseError { reason })?;
2242 let lhs = parsed_dsl.response_expr.trim();
2243 if lhs.is_empty() {
2244 return Err(FormulaDslError::ParseError {
2245 reason: "formula response (left-hand side) cannot be empty".to_string(),
2246 });
2247 }
2248 let mut terms = Vec::<ParsedTerm>::new();
2249 let mut linkwiggle: Option<LinkWiggleFormulaSpec> = None;
2250 let mut timewiggle: Option<LinkWiggleFormulaSpec> = None;
2251 let mut linkspec: Option<LinkFormulaSpec> = None;
2252 let mut survivalspec: Option<SurvivalFormulaSpec> = None;
2253 let mut logslope_surfaces = Vec::<LogSlopeSurfaceSpec>::new();
2254 let mut seen_term_keys: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
2259 let mut expanded_terms = Vec::<String>::new();
2260 for raw in parsed_dsl.rhs_terms {
2261 let trimmed = raw.trim();
2262 if trimmed.is_empty() {
2263 expanded_terms.push(String::new());
2264 continue;
2265 }
2266 let is_call = parse_function_call(trimmed).is_ok();
2271 let needs_expansion = !is_call
2272 && trimmed
2273 .chars()
2274 .scan(0i32, |depth, ch| {
2275 let d_before = *depth;
2276 match ch {
2277 '(' | '[' | '{' => *depth += 1,
2278 ')' | ']' | '}' if *depth > 0 => *depth -= 1,
2279 _ => {}
2280 }
2281 Some((d_before, ch))
2282 })
2283 .any(|(d, ch)| d == 0 && matches!(ch, ':' | '*' | '/' | '^'));
2284 if needs_expansion {
2285 for atoms in
2286 expand_wr_term(trimmed).map_err(|reason| FormulaDslError::ParseError { reason })?
2287 {
2288 if atoms.is_empty() {
2289 continue;
2290 }
2291 expanded_terms.push(atoms.join(":"));
2292 }
2293 } else {
2294 expanded_terms.push(trimmed.to_string());
2295 }
2296 }
2297
2298 for raw in expanded_terms {
2299 let t = raw.trim();
2300 if t.is_empty() || t == "1" {
2301 continue;
2302 }
2303 if t == "0" || t == "-1" {
2304 return Err(FormulaDslError::IncompatibleTerm {
2305 reason: "formula terms '0'/'-1' (intercept removal) are not supported yet"
2306 .to_string(),
2307 });
2308 }
2309 let key: String = {
2313 let mut acc = String::with_capacity(t.len());
2314 let mut in_single = false;
2315 let mut in_double = false;
2316 for ch in t.chars() {
2317 match ch {
2318 '\'' if !in_double => {
2319 in_single = !in_single;
2320 acc.push(ch);
2321 }
2322 '"' if !in_single => {
2323 in_double = !in_double;
2324 acc.push(ch);
2325 }
2326 c if c.is_whitespace() && !in_single && !in_double => {}
2327 _ => acc.push(ch),
2328 }
2329 }
2330 acc
2331 };
2332 if !seen_term_keys.insert(key.clone()) {
2333 return Err(FormulaDslError::IncompatibleTerm {
2334 reason: format!(
2335 "formula `{formula}` lists term `{t}` more than once. \
2336 Duplicate terms produce a rank-deficient design; \
2337 keep one copy or differentiate them (e.g. distinct k=, bs= options)."
2338 ),
2339 });
2340 }
2341 match parse_term(t)? {
2342 ParsedTerm::LinkWiggle { options } => {
2343 if linkwiggle.is_some() {
2344 return Err(FormulaDslError::IncompatibleTerm {
2345 reason: "formula can include at most one linkwiggle(...) term".to_string(),
2346 });
2347 }
2348 linkwiggle = Some(parse_linkwiggle_formulaspec(&options, t)?);
2349 }
2350 ParsedTerm::TimeWiggle { options } => {
2351 if timewiggle.is_some() {
2352 return Err(FormulaDslError::IncompatibleTerm {
2353 reason: "formula can include at most one timewiggle(...) term".to_string(),
2354 });
2355 }
2356 timewiggle = Some(parse_linkwiggle_formulaspec(&options, t)?);
2357 }
2358 ParsedTerm::LinkConfig { options } => {
2359 if linkspec.is_some() {
2360 return Err(FormulaDslError::IncompatibleTerm {
2361 reason: "formula can include at most one link(...) term".to_string(),
2362 });
2363 }
2364 linkspec = Some(parse_link_formulaspec(&options, t)?);
2365 }
2366 ParsedTerm::SurvivalConfig { options } => {
2367 if survivalspec.is_some() {
2368 return Err(FormulaDslError::IncompatibleTerm {
2369 reason: "formula can include at most one survmodel(...) term".to_string(),
2370 });
2371 }
2372 survivalspec = Some(parse_survival_formulaspec(&options, t)?);
2373 }
2374 ParsedTerm::LogSlopeSurface { z_column, terms } => {
2375 logslope_surfaces.push(LogSlopeSurfaceSpec { z_column, terms });
2376 }
2377 other => terms.push(other),
2378 }
2379 }
2380 if lhs.chars().all(|c| c.is_alphanumeric() || c == '_')
2386 && !lhs.is_empty()
2387 && parsed_terms_reference_column(&terms, lhs)
2388 {
2389 return Err(FormulaDslError::IncompatibleTerm {
2390 reason: format!(
2391 "formula `{formula}` uses response column `{lhs}` as its own predictor. \
2392 This fits y as a function of itself and is almost certainly a typo. \
2393 Drop the term that mentions `{lhs}` from the right-hand side."
2394 ),
2395 });
2396 }
2397 Ok(ParsedFormula {
2398 response: lhs.to_string(),
2399 terms,
2400 logslope_surfaces,
2401 linkwiggle,
2402 timewiggle,
2403 linkspec,
2404 survivalspec,
2405 })
2406}
2407
2408pub fn parse_term(raw: &str) -> Result<ParsedTerm, String> {
2409 fn split_call_args(call: &FunctionCallSpec) -> (Vec<String>, BTreeMap<String, String>) {
2410 let mut vars = Vec::<String>::new();
2411 let mut options = BTreeMap::<String, String>::new();
2412 for arg in &call.args {
2413 match arg {
2414 CallArgSpec::Positional(v) => vars.push(v.trim().to_string()),
2415 CallArgSpec::Named { key, value } => {
2416 options.insert(key.to_ascii_lowercase(), strip_quotes(value).to_string());
2417 }
2418 }
2419 }
2420 (vars, options)
2421 }
2422
2423 if raw.contains(':')
2427 && !raw.contains('(')
2428 && raw.split(':').all(|piece| is_exact_ident(piece.trim()))
2429 {
2430 let vars: Vec<String> = raw
2431 .split(':')
2432 .map(|piece| piece.trim().to_string())
2433 .collect();
2434 if vars.len() >= 2 {
2435 let mut sorted = vars.clone();
2436 sorted.sort();
2437 sorted.dedup();
2438 if sorted.len() != vars.len() {
2439 return Err(FormulaDslError::IncompatibleTerm {
2440 reason: format!(
2441 "interaction term `{raw}` references the same variable more than once"
2442 ),
2443 }
2444 .into());
2445 }
2446 return Ok(ParsedTerm::Interaction { vars: sorted });
2447 }
2448 }
2449
2450 let call = parse_function_call(raw).ok();
2451 if let Some(call) = call {
2452 let name = call.name.to_ascii_lowercase();
2453 let (vars, mut options) = split_call_args(&call);
2454 match name.as_str() {
2455 "constrain" | "constraint" | "box" => {
2456 if vars.len() != 1 {
2457 return Err(FormulaDslError::InvalidArgument {
2458 reason: format!(
2459 "constrain()/constraint()/box() expects exactly one variable: {raw}"
2460 ),
2461 }
2462 .into());
2463 }
2464 validate_known_term_options(
2465 "constrain",
2466 &options,
2467 &["min", "lower", "max", "upper"],
2468 raw,
2469 )?;
2470 let (coefficient_min, coefficient_max) =
2471 parse_linear_constraint_bounds(&options, raw)?;
2472 if coefficient_min.is_none() && coefficient_max.is_none() {
2473 return Err(FormulaDslError::MalformedConfig {
2474 reason: format!(
2475 "constrain()/constraint()/box() requires at least one of min/lower/max/upper: {raw}"
2476 ),
2477 }
2478 .into());
2479 }
2480 return Ok(ParsedTerm::Linear {
2481 name: vars[0].clone(),
2482 explicit: true,
2483 coefficient_min,
2484 coefficient_max,
2485 });
2486 }
2487 "nonnegative" | "nonnegative_coef" => {
2488 if vars.len() != 1 {
2489 return Err(FormulaDslError::InvalidArgument {
2490 reason: format!("nonnegative() expects exactly one variable: {raw}"),
2491 }
2492 .into());
2493 }
2494 validate_known_term_options("nonnegative", &options, &[], raw)?;
2495 return Ok(ParsedTerm::Linear {
2496 name: vars[0].clone(),
2497 explicit: true,
2498 coefficient_min: Some(0.0),
2499 coefficient_max: None,
2500 });
2501 }
2502 "nonpositive" | "nonpositive_coef" => {
2503 if vars.len() != 1 {
2504 return Err(FormulaDslError::InvalidArgument {
2505 reason: format!("nonpositive() expects exactly one variable: {raw}"),
2506 }
2507 .into());
2508 }
2509 validate_known_term_options("nonpositive", &options, &[], raw)?;
2510 return Ok(ParsedTerm::Linear {
2511 name: vars[0].clone(),
2512 explicit: true,
2513 coefficient_min: None,
2514 coefficient_max: Some(0.0),
2515 });
2516 }
2517 "bounded" => {
2518 if vars.len() != 1 {
2519 return Err(FormulaDslError::InvalidArgument {
2520 reason: format!("bounded() expects exactly one variable: {raw}"),
2521 }
2522 .into());
2523 }
2524 validate_known_term_options(
2525 "bounded",
2526 &options,
2527 &["min", "max", "prior", "pull", "target", "strength"],
2528 raw,
2529 )?;
2530 let min = parse_required_f64_option(&options, "min", raw)?;
2531 let max = parse_required_f64_option(&options, "max", raw)?;
2532 if !min.is_finite() || !max.is_finite() || min >= max {
2533 return Err(FormulaDslError::InvalidArgument {
2534 reason: format!(
2535 "bounded() requires finite min < max, got min={min}, max={max}: {raw}"
2536 ),
2537 }
2538 .into());
2539 }
2540 let prior = parse_bounded_priorspec(&options, min, max, raw)?;
2541 return Ok(ParsedTerm::BoundedLinear {
2542 name: vars[0].clone(),
2543 min,
2544 max,
2545 prior,
2546 });
2547 }
2548 "group" | "re" | "factor" => {
2549 if vars.len() != 1 {
2550 return Err(FormulaDslError::InvalidArgument {
2551 reason: format!(
2552 "{name}() expects exactly one variable, got '{}': {raw}",
2553 vars.join(",")
2554 ),
2555 }
2556 .into());
2557 }
2558 return Ok(ParsedTerm::RandomEffect {
2559 name: vars[0].clone(),
2560 });
2561 }
2562 "tensor" | "interaction" | "te" => {
2563 if vars.len() < 2 {
2564 return Err(FormulaDslError::InvalidArgument {
2565 reason: format!(
2566 "tensor()/interaction()/te() requires at least two variables: {raw}"
2567 ),
2568 }
2569 .into());
2570 }
2571 return Ok(ParsedTerm::Smooth {
2572 label: raw.to_string(),
2573 vars,
2574 kind: SmoothKind::Te,
2575 options,
2576 });
2577 }
2578 "t2" => {
2579 if vars.len() < 2 {
2580 return Err(FormulaDslError::InvalidArgument {
2581 reason: format!("t2() requires at least two variables: {raw}"),
2582 }
2583 .into());
2584 }
2585 return Ok(ParsedTerm::Smooth {
2586 label: raw.to_string(),
2587 vars,
2588 kind: SmoothKind::T2,
2589 options,
2590 });
2591 }
2592 "ti" => {
2593 if vars.len() < 2 {
2600 return Err(FormulaDslError::InvalidArgument {
2601 reason: format!("ti() requires at least two variables: {raw}"),
2602 }
2603 .into());
2604 }
2605 return Ok(ParsedTerm::Smooth {
2606 label: raw.to_string(),
2607 vars,
2608 kind: SmoothKind::Ti,
2609 options,
2610 });
2611 }
2612 "fs" | "sz" => {
2613 if vars.len() != 2 {
2614 return Err(format!("{}() expects exactly two variables: {raw}", name));
2615 }
2616 options.insert("bs".to_string(), name.clone());
2617 return Ok(ParsedTerm::Smooth {
2618 label: raw.to_string(),
2619 vars,
2620 kind: SmoothKind::S,
2621 options,
2622 });
2623 }
2624 "thinplate" | "thin_plate" | "tps" => {
2625 if vars.len() < 2 {
2626 return Err(FormulaDslError::InvalidArgument {
2627 reason: format!(
2628 "thinplate()/thin_plate()/tps() requires at least two variables: {raw}"
2629 ),
2630 }
2631 .into());
2632 }
2633 options.insert("type".to_string(), "tps".to_string());
2634 return Ok(ParsedTerm::Smooth {
2635 label: raw.to_string(),
2636 vars,
2637 kind: SmoothKind::S,
2638 options,
2639 });
2640 }
2641 "smooth" | "s" | "cyclic" | "periodic" | "cc" | "cp" => {
2642 if vars.is_empty() {
2643 return Err(FormulaDslError::InvalidArgument {
2644 reason: format!("smooth()/s() requires at least one variable: {raw}"),
2645 }
2646 .into());
2647 }
2648 let bs_is_re = options
2654 .get("bs")
2655 .or_else(|| options.get("type"))
2656 .map(|v| {
2657 v.trim()
2658 .trim_matches(|c| c == '\'' || c == '"')
2659 .to_ascii_lowercase()
2660 })
2661 .as_deref()
2662 == Some("re");
2663 if bs_is_re && vars.len() == 1 {
2664 return Ok(ParsedTerm::RandomEffect {
2665 name: vars[0].clone(),
2666 });
2667 }
2668 if matches!(name.as_str(), "cyclic" | "periodic" | "cc" | "cp") {
2669 options.insert("type".to_string(), "cyclic".to_string());
2670 }
2671 if matches!(name.as_str(), "fs" | "sz") {
2672 options.insert("bs".to_string(), name.clone());
2673 }
2674 return Ok(ParsedTerm::Smooth {
2675 label: raw.to_string(),
2676 vars,
2677 kind: SmoothKind::S,
2678 options,
2679 });
2680 }
2681 "sphere" | "sos" | "spherical" | "s2" => {
2682 if vars.len() != 2 {
2689 return Err(FormulaDslError::InvalidArgument {
2690 reason: format!(
2691 "{name}() expects exactly two variables: latitude and longitude; got {} in {raw}",
2692 vars.len()
2693 ),
2694 }
2695 .into());
2696 }
2697 options.insert("type".to_string(), "sphere".to_string());
2698 return Ok(ParsedTerm::Smooth {
2699 label: raw.to_string(),
2700 vars,
2701 kind: SmoothKind::S,
2702 options,
2703 });
2704 }
2705 "mjs" | "measurejet" | "measure_jet" | "web" => {
2706 if vars.is_empty() {
2712 return Err(FormulaDslError::InvalidArgument {
2713 reason: format!("{name}() requires at least one variable: {raw}"),
2714 }
2715 .into());
2716 }
2717 options.insert("type".to_string(), "measurejet".to_string());
2718 return Ok(ParsedTerm::Smooth {
2719 label: raw.to_string(),
2720 vars,
2721 kind: SmoothKind::S,
2722 options,
2723 });
2724 }
2725 "curv" | "curvature" | "constant_curvature" | "mkappa" => {
2726 if vars.is_empty() {
2732 return Err(FormulaDslError::InvalidArgument {
2733 reason: format!("{name}() requires at least one variable: {raw}"),
2734 }
2735 .into());
2736 }
2737 options.insert("type".to_string(), "curvature".to_string());
2738 return Ok(ParsedTerm::Smooth {
2739 label: raw.to_string(),
2740 vars,
2741 kind: SmoothKind::S,
2742 options,
2743 });
2744 }
2745 "matern" => {
2746 if vars.is_empty() {
2747 return Err(FormulaDslError::InvalidArgument {
2748 reason: format!("matern() requires at least one variable: {raw}"),
2749 }
2750 .into());
2751 }
2752 options.insert("type".to_string(), "matern".to_string());
2753 return Ok(ParsedTerm::Smooth {
2754 label: raw.to_string(),
2755 vars,
2756 kind: SmoothKind::S,
2757 options,
2758 });
2759 }
2760 "duchon" => {
2761 if vars.is_empty() {
2762 return Err(FormulaDslError::InvalidArgument {
2763 reason: format!("duchon() requires at least one variable: {raw}"),
2764 }
2765 .into());
2766 }
2767 if option_bool(&options, "cyclic").unwrap_or(false)
2768 || option_bool(&options, "periodic").unwrap_or(false)
2769 {
2770 options.insert("cyclic".to_string(), "true".to_string());
2771 }
2772 options.insert("type".to_string(), "duchon".to_string());
2773 return Ok(ParsedTerm::Smooth {
2774 label: raw.to_string(),
2775 vars,
2776 kind: SmoothKind::S,
2777 options,
2778 });
2779 }
2780 "pca" => {
2781 if vars.is_empty() {
2782 return Err(FormulaDslError::InvalidArgument {
2783 reason: format!("pca() requires at least one variable: {raw}"),
2784 }
2785 .into());
2786 }
2787 options.insert("type".to_string(), "pca".to_string());
2788 return Ok(ParsedTerm::Smooth {
2789 label: raw.to_string(),
2790 vars,
2791 kind: SmoothKind::S,
2792 options,
2793 });
2794 }
2795 "linkwiggle" => {
2796 if !vars.is_empty() {
2797 return Err(FormulaDslError::InvalidArgument {
2798 reason: format!(
2799 "linkwiggle() takes named options only; positional args are not supported: {raw}"
2800 ),
2801 }
2802 .into());
2803 }
2804 return Ok(ParsedTerm::LinkWiggle { options });
2805 }
2806 "timewiggle" => {
2807 if !vars.is_empty() {
2808 return Err(FormulaDslError::InvalidArgument {
2809 reason: format!(
2810 "timewiggle() takes named options only; positional args are not supported: {raw}"
2811 ),
2812 }
2813 .into());
2814 }
2815 return Ok(ParsedTerm::TimeWiggle { options });
2816 }
2817 "link" => {
2818 if !vars.is_empty() {
2819 return Err(FormulaDslError::InvalidArgument {
2820 reason: format!(
2821 "link() takes named options only; positional args are not supported: {raw}"
2822 ),
2823 }
2824 .into());
2825 }
2826 return Ok(ParsedTerm::LinkConfig { options });
2827 }
2828 "survmodel" => {
2829 if !vars.is_empty() {
2830 return Err(FormulaDslError::InvalidArgument {
2831 reason: format!(
2832 "survmodel() takes named options only; positional args are not supported: {raw}"
2833 ),
2834 }
2835 .into());
2836 }
2837 return Ok(ParsedTerm::SurvivalConfig { options });
2838 }
2839 "logslope" | "log_slope" | "log_slope_surface" => {
2840 validate_known_term_options("logslope", &options, &[], raw)?;
2841 if vars.len() < 2 {
2842 return Err(FormulaDslError::InvalidArgument {
2843 reason: format!(
2844 "logslope() expects a z column followed by one or more RHS terms; add one logslope(z, ...) declaration per vector-z coordinate: {raw}"
2845 ),
2846 }
2847 .into());
2848 }
2849 let z_column = vars[0].trim();
2850 if !is_exact_ident(z_column) {
2851 return Err(FormulaDslError::InvalidArgument {
2852 reason: format!(
2853 "logslope() z column must be a bare column name, got `{z_column}` in {raw}"
2854 ),
2855 }
2856 .into());
2857 }
2858 let rhs = vars[1..].join(" + ");
2859 let parsed = parse_formula(&format!("__logslope__ ~ {rhs}"))?;
2860 if !parsed.logslope_surfaces.is_empty() {
2861 return Err(FormulaDslError::IncompatibleTerm {
2862 reason: format!(
2863 "logslope() declarations cannot be nested inside another logslope(): {raw}"
2864 ),
2865 }
2866 .into());
2867 }
2868 validate_auxiliary_formula_controls(&parsed, "logslope()")?;
2869 return Ok(ParsedTerm::LogSlopeSurface {
2870 z_column: z_column.to_string(),
2871 terms: parsed.terms,
2872 });
2873 }
2874 "linear" => {
2875 if vars.len() != 1 {
2876 return Err(FormulaDslError::InvalidArgument {
2877 reason: format!("linear() expects exactly one variable: {raw}"),
2878 }
2879 .into());
2880 }
2881 validate_known_term_options(
2882 "linear",
2883 &options,
2884 &["min", "lower", "max", "upper"],
2885 raw,
2886 )?;
2887 let (coefficient_min, coefficient_max) =
2888 parse_linear_constraint_bounds(&options, raw)?;
2889 return Ok(ParsedTerm::Linear {
2890 name: vars[0].clone(),
2891 explicit: true,
2892 coefficient_min,
2893 coefficient_max,
2894 });
2895 }
2896 _ => {
2897 return Err(format!(
2898 "unknown term function `{name}` in '{raw}'. Supported: bounded(), linear(), constrain()/constraint()/box(), nonnegative(), nonpositive(), smooth()/s(), cyclic()/periodic()/cc()/cp(), thinplate()/thin_plate()/tps(), tensor()/interaction()/te(), t2(), ti(), fs(), sz(), group()/re()/factor(), sphere()/sos()/spherical(), s2(), matern(), duchon(), pca(), logslope()/log_slope(), linkwiggle(), timewiggle(), link(), survmodel()"
2899 ));
2900 }
2901 }
2902 }
2903
2904 let ident = raw.trim();
2905 if !is_exact_ident(ident) {
2906 return Err(FormulaDslError::UnknownIdentifier {
2907 reason: format!("unsupported top-level RHS term: {raw}"),
2908 }
2909 .into());
2910 }
2911
2912 Ok(ParsedTerm::Linear {
2913 name: ident.to_string(),
2914 explicit: false,
2915 coefficient_min: None,
2916 coefficient_max: None,
2917 })
2918}
2919
2920pub fn parse_link_choice(
2925 raw: Option<&str>,
2926 flexible_flag: bool,
2927) -> Result<Option<LinkChoice>, FormulaDslError> {
2928 if raw.is_none() && !flexible_flag {
2929 return Ok(None);
2930 }
2931 let Some(v) = raw else {
2932 return Ok(Some(LinkChoice {
2933 mode: LinkMode::Flexible,
2934 link: LinkFunction::Probit,
2935 mixture_components: None,
2936 }));
2937 };
2938 let t = v.trim().to_ascii_lowercase();
2939 if let Some(inner) = t
2940 .strip_prefix("flexible(")
2941 .and_then(|s| s.strip_suffix(')'))
2942 {
2943 if let Some(components_inner) = inner
2944 .strip_prefix("blended(")
2945 .and_then(|s| s.strip_suffix(')'))
2946 .or_else(|| {
2947 inner
2948 .strip_prefix("mixture(")
2949 .and_then(|s| s.strip_suffix(')'))
2950 })
2951 {
2952 parse_link_component_list(components_inner)?;
2953 return Err(FormulaDslError::IncompatibleTerm {
2954 reason:
2955 "flexible(...) does not support blended(...)/mixture(...) links; wiggle is only supported for jointly fit standard links"
2956 .to_string(),
2957 });
2958 }
2959 let link = parse_linkname(inner)?;
2960 if !linkname_supports_joint_wiggle(link) {
2961 return Err(FormulaDslError::IncompatibleTerm {
2962 reason:
2963 "flexible(...) does not support sas/beta-logistic links; wiggle is only supported for jointly fit standard links"
2964 .to_string(),
2965 });
2966 }
2967 return Ok(Some(LinkChoice {
2968 mode: LinkMode::Flexible,
2969 link,
2970 mixture_components: None,
2971 }));
2972 }
2973 if let Some(inner) = t
2974 .strip_prefix("blended(")
2975 .and_then(|s| s.strip_suffix(')'))
2976 .or_else(|| t.strip_prefix("mixture(").and_then(|s| s.strip_suffix(')')))
2977 {
2978 if flexible_flag {
2979 return Err(FormulaDslError::IncompatibleTerm {
2980 reason:
2981 "--flexible-link cannot be combined with --link blended(...)/mixture(...); blended inverse links are not flexible-link mode"
2982 .to_string(),
2983 });
2984 }
2985 let components = parse_link_component_list(inner)?;
2986 return Ok(Some(LinkChoice {
2987 mode: LinkMode::Strict,
2988 link: LinkFunction::Logit,
2989 mixture_components: Some(components),
2990 }));
2991 }
2992
2993 let link = parse_linkname(&t)?;
2994 if flexible_flag && !linkname_supports_joint_wiggle(link) {
2995 return Err(FormulaDslError::IncompatibleTerm {
2996 reason:
2997 "--flexible-link does not support sas/beta-logistic links; wiggle is only supported for jointly fit standard links"
2998 .to_string(),
2999 });
3000 }
3001 Ok(Some(LinkChoice {
3002 mode: if flexible_flag {
3003 LinkMode::Flexible
3004 } else {
3005 LinkMode::Strict
3006 },
3007 link,
3008 mixture_components: None,
3009 }))
3010}
3011
3012pub fn parse_linkname(v: &str) -> Result<LinkFunction, FormulaDslError> {
3013 match v.trim() {
3014 "identity" => Ok(LinkFunction::Identity),
3015 "log" => Ok(LinkFunction::Log),
3016 "logit" | "binomial-logit" => Ok(LinkFunction::Logit),
3017 "probit" | "binomial-probit" => Ok(LinkFunction::Probit),
3018 "cloglog" | "binomial-cloglog" => Ok(LinkFunction::CLogLog),
3019 "sas" => Ok(LinkFunction::Sas),
3020 "beta-logistic" => Ok(LinkFunction::BetaLogistic),
3021 other => Err(FormulaDslError::UnknownIdentifier {
3022 reason: format!(
3023 "unsupported link type '{other}'; \
3024 use one of identity|log|logit|probit|cloglog|binomial-logit|binomial-probit|binomial-cloglog|sas|beta-logistic|blended(...)/mixture(...) or flexible(...). \
3025 Both `--link <type>` (CLI flag) and `link(type=<type>)` (formula term) accept the same set."
3026 ),
3027 }),
3028 }
3029}
3030
3031pub fn parse_link_component(v: &str) -> Result<LinkComponent, String> {
3032 match v.trim() {
3033 "logit" => Ok(LinkComponent::Logit),
3034 "probit" => Ok(LinkComponent::Probit),
3035 "cloglog" => Ok(LinkComponent::CLogLog),
3036 "loglog" => Ok(LinkComponent::LogLog),
3037 "cauchit" => Ok(LinkComponent::Cauchit),
3038 other => Err(FormulaDslError::UnknownIdentifier {
3039 reason: format!(
3040 "unsupported blended-link component '{other}'; use probit|logit|cloglog|loglog|cauchit"
3041 ),
3042 }
3043 .into()),
3044 }
3045}
3046
3047pub fn parse_link_component_list(v: &str) -> Result<Vec<LinkComponent>, String> {
3048 let mut out = Vec::new();
3049 for part in v.split(',') {
3050 let trimmed = part.trim();
3051 if trimmed.is_empty() {
3052 continue;
3053 }
3054 let comp = parse_link_component(trimmed)?;
3055 if out.contains(&comp) {
3056 return Err(FormulaDslError::IncompatibleTerm {
3057 reason: "blended(...) cannot contain duplicate components".to_string(),
3058 }
3059 .into());
3060 }
3061 out.push(comp);
3062 }
3063 if out.len() < 2 {
3064 return Err(FormulaDslError::InvalidArgument {
3065 reason: "blended(...) requires at least two components".to_string(),
3066 }
3067 .into());
3068 }
3069 Ok(out)
3070}