1use std::collections::BTreeMap;
2
3use pest::Parser;
4use pest::iterators::Pair;
5use pest_derive::Parser;
6
7use crate::smooth::BoundedCoefficientPriorSpec;
8use gam_problem::types::{
9 InverseLink, LikelihoodSpec, LinkComponent, LinkFunction, StandardLink, WigglePenaltyConfig,
10};
11
12#[derive(Parser)]
13#[grammar_inline = r#"
14WHITESPACE = _{ " " | "\t" | NEWLINE }
15
16top_function_call = { SOI ~ function_call ~ EOI }
17top_expr = { SOI ~ expr ~ EOI }
18formula = { SOI ~ expr ~ "~" ~ rhs ~ EOI }
19rhs = { term ~ ("+" ~ term)* }
20term = { expr }
21
22expr = { sum }
23sum = { product ~ (add_op ~ product)* }
24add_op = { "+" | "-" }
25product = { interact ~ (mul_op ~ interact)* }
26mul_op = { "*" | "/" }
27interact = { power ~ (interact_op ~ power)* }
28interact_op = { ":" }
29power = { unary ~ (pow_op ~ unary)* }
30pow_op = { "^" }
31unary = { unary_op* ~ primary }
32unary_op = _{ "+" | "-" }
33
34primary = { function_call | list_lit | tuple_lit | ident | number | string_lit | "(" ~ expr ~ ")" }
35list_lit = @{ "[" ~ (!"]" ~ ANY)* ~ "]" }
36tuple_lit = @{ "(" ~ (!("," | ")") ~ ANY)+ ~ "," ~ (!")" ~ ANY)* ~ ")" }
37function_call = { ident ~ "(" ~ arg_list? ~ ")" }
38arg_list = { arg ~ ("," ~ arg)* }
39arg = { named_arg | expr }
40named_arg = { ident ~ "=" ~ expr }
41
42ident = @{ ident_start ~ ident_continue* }
43ident_start = _{ ASCII_ALPHA | "_" }
44ident_continue = _{ ASCII_ALPHANUMERIC | "_" | "." }
45
46number = @{
47 "-"?
48 ~ (ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT*)? | "." ~ ASCII_DIGIT+)
49 ~ (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)?
50}
51
52string_lit = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" | "'" ~ (!"'" ~ ANY)* ~ "'" }
53"#]
54struct FormulaParser;
55
56#[derive(Clone, Debug, PartialEq, Eq)]
57pub struct FormulaDslParse {
58 pub response_expr: String,
59 pub rhs_terms: Vec<String>,
60}
61
62#[derive(Clone, Debug, PartialEq, Eq)]
63pub enum CallArgSpec {
64 Positional(String),
65 Named { key: String, value: String },
66}
67
68#[derive(Clone, Debug, PartialEq, Eq)]
69pub struct FunctionCallSpec {
70 pub name: String,
71 pub args: Vec<CallArgSpec>,
72}
73
74#[derive(Clone, Debug, PartialEq, Eq)]
85pub enum FormulaDslError {
86 ParseError { reason: String },
90 UnknownIdentifier { reason: String },
94 InvalidArgument { reason: String },
97 IncompatibleTerm { reason: String },
101 MalformedConfig { reason: String },
105}
106
107gam_linalg::impl_reason_error_boilerplate! {
108 FormulaDslError {
109 ParseError,
110 UnknownIdentifier,
111 InvalidArgument,
112 IncompatibleTerm,
113 MalformedConfig,
114 }
115}
116
117impl From<String> for FormulaDslError {
123 fn from(reason: String) -> Self {
124 FormulaDslError::ParseError { reason }
125 }
126}
127
128pub fn parse_formula_dsl(formula: &str) -> Result<FormulaDslParse, String> {
129 validate_balanced_delimiters(formula, "invalid formula syntax")?;
130 let mut parsed =
131 FormulaParser::parse(Rule::formula, formula).map_err(|e| FormulaDslError::ParseError {
132 reason: format!("invalid formula syntax: {e}"),
133 })?;
134 let formula_pair = parsed.next().ok_or_else(|| FormulaDslError::ParseError {
135 reason: "invalid formula syntax: empty parse".to_string(),
136 })?;
137
138 let mut response_expr: Option<String> = None;
139 let mut rhs_terms: Option<Vec<String>> = None;
140
141 for part in formula_pair.into_inner() {
142 match part.as_rule() {
143 Rule::expr if response_expr.is_none() => {
144 response_expr = Some(part.as_str().trim().to_string());
145 }
146 Rule::rhs => {
147 rhs_terms = Some(extract_rhs_terms(part)?);
148 }
149 _ => {}
150 }
151 }
152
153 let response_expr = response_expr.ok_or_else(|| FormulaDslError::ParseError {
154 reason: "invalid formula: missing response expression".to_string(),
155 })?;
156 let rhs_terms = rhs_terms.ok_or_else(|| FormulaDslError::ParseError {
157 reason: "invalid formula: missing RHS terms".to_string(),
158 })?;
159 if rhs_terms.is_empty() {
160 return Err(FormulaDslError::ParseError {
161 reason: "formula has no usable terms".to_string(),
162 }
163 .into());
164 }
165
166 Ok(FormulaDslParse {
167 response_expr,
168 rhs_terms,
169 })
170}
171
172fn delimiter_balance_error(prefix: &str) -> String {
173 format!("{prefix}: unbalanced parentheses or quotes")
174}
175
176fn validate_balanced_delimiters(input: &str, prefix: &str) -> Result<(), String> {
180 let mut stack = Vec::<char>::new();
181 let mut in_single = false;
182 let mut in_double = false;
183
184 for ch in input.chars() {
185 match ch {
186 '\'' if !in_double => in_single = !in_single,
187 '"' if !in_single => in_double = !in_double,
188 '(' | '[' | '{' if !in_single && !in_double => stack.push(ch),
189 ')' | ']' | '}' if !in_single && !in_double => {
190 let expected = match ch {
191 ')' => '(',
192 ']' => '[',
193 _ => '{',
195 };
196 if stack.pop() != Some(expected) {
197 return Err(FormulaDslError::ParseError {
198 reason: delimiter_balance_error(prefix),
199 }
200 .into());
201 }
202 }
203 _ => {}
204 }
205 }
206
207 if in_single || in_double || !stack.is_empty() {
208 return Err(FormulaDslError::ParseError {
209 reason: delimiter_balance_error(prefix),
210 }
211 .into());
212 }
213 Ok(())
214}
215
216fn extract_rhs_terms(rhs: Pair<'_, Rule>) -> Result<Vec<String>, String> {
217 let mut out = Vec::new();
218 let mut depth = 0_i32;
219 let mut in_single = false;
220 let mut in_double = false;
221 let mut start = 0_usize;
222 let mut last_significant: Option<char> = None;
235 let text = rhs.as_str();
236 let bytes = text.as_bytes();
237 for (idx, &b) in bytes.iter().enumerate() {
238 let ch = b as char;
239 match ch {
240 '\'' if !in_double => in_single = !in_single,
241 '"' if !in_single => in_double = !in_double,
242 '(' | '[' | '{' if !in_single && !in_double => depth += 1,
243 ')' | ']' | '}' if !in_single && !in_double && depth > 0 => depth -= 1,
244 '+' if !in_single
245 && !in_double
246 && depth == 0
247 && !matches!(
248 last_significant,
249 None | Some(':' | '*' | '/' | '^' | '+' | '-')
250 ) =>
251 {
252 let term = text[start..idx].trim();
253 if term.is_empty() {
254 return Err(FormulaDslError::ParseError {
255 reason: "formula RHS contains an empty term".to_string(),
256 }
257 .into());
258 }
259 out.push(term.to_string());
260 start = idx + 1;
261 }
262 _ => {}
263 }
264 if !ch.is_ascii_whitespace() {
265 last_significant = Some(ch);
266 }
267 }
268 if in_single || in_double || depth != 0 {
269 return Err(FormulaDslError::ParseError {
270 reason: "formula RHS has unbalanced quotes or parentheses".to_string(),
271 }
272 .into());
273 }
274 let tail = text[start..].trim();
275 if tail.is_empty() {
276 return Err(FormulaDslError::ParseError {
277 reason: "formula RHS contains an empty term".to_string(),
278 }
279 .into());
280 }
281 out.push(tail.to_string());
282 Ok(out)
283}
284
285type WrAtomList = Vec<String>;
307
308fn expand_wr_term(raw: &str) -> Result<Vec<WrAtomList>, String> {
309 let mut parsed = FormulaParser::parse(Rule::top_expr, raw).map_err(|e| {
310 FormulaDslError::ParseError {
311 reason: format!("invalid term syntax in `{raw}`: {e}"),
312 }
313 .to_string()
314 })?;
315 let top = parsed.next().ok_or_else(|| {
316 FormulaDslError::ParseError {
317 reason: format!("invalid term syntax in `{raw}`: empty parse"),
318 }
319 .to_string()
320 })?;
321 let expr = top
322 .into_inner()
323 .find(|p| p.as_rule() == Rule::expr)
324 .ok_or_else(|| {
325 FormulaDslError::ParseError {
326 reason: format!("invalid term syntax in `{raw}`: missing expr"),
327 }
328 .to_string()
329 })?;
330 let interactions = expand_expr(expr, raw)?;
331 let normalized: Vec<WrAtomList> = interactions
332 .into_iter()
333 .map(normalize_interaction)
334 .collect();
335 let mut seen = std::collections::BTreeSet::<Vec<String>>::new();
338 let mut out = Vec::<WrAtomList>::new();
339 for term in normalized {
340 let key = term.clone();
341 if seen.insert(key) {
342 out.push(term);
343 }
344 }
345 Ok(out)
346}
347
348fn normalize_interaction(mut atoms: WrAtomList) -> WrAtomList {
349 atoms.sort();
350 atoms.dedup();
351 atoms
352}
353
354fn expand_expr(pair: Pair<'_, Rule>, raw: &str) -> Result<Vec<WrAtomList>, String> {
355 match pair.as_rule() {
356 Rule::expr => {
357 let inner = pair.into_inner().next().ok_or_else(|| {
358 FormulaDslError::ParseError {
359 reason: format!("invalid term syntax in `{raw}`: empty expr"),
360 }
361 .to_string()
362 })?;
363 expand_expr(inner, raw)
364 }
365 Rule::sum => {
366 let mut iter = pair.into_inner();
367 let first = iter.next().ok_or_else(|| {
368 FormulaDslError::ParseError {
369 reason: format!("invalid term syntax in `{raw}`: empty sum"),
370 }
371 .to_string()
372 })?;
373 let mut acc = expand_expr(first, raw)?;
374 while let Some(op) = iter.next() {
375 if op.as_rule() != Rule::add_op {
376 return Err(FormulaDslError::ParseError {
377 reason: format!(
378 "invalid term syntax in `{raw}`: expected add operator, got `{:?}`",
379 op.as_rule()
380 ),
381 }
382 .into());
383 }
384 let op_str = op.as_str().trim();
385 let operand = iter.next().ok_or_else(|| {
386 FormulaDslError::ParseError {
387 reason: format!("invalid term syntax in `{raw}`: dangling `{op_str}`"),
388 }
389 .to_string()
390 })?;
391 if op_str == "-" {
392 return Err(FormulaDslError::IncompatibleTerm {
393 reason: format!(
394 "binary `-` is not supported inside a formula term in `{raw}` \
395 (use multiple `+` terms or drop the unwanted predictor explicitly)"
396 ),
397 }
398 .into());
399 }
400 let mut rhs = expand_expr(operand, raw)?;
401 acc.append(&mut rhs);
402 }
403 Ok(acc)
404 }
405 Rule::product => {
406 let mut iter = pair.into_inner();
407 let first = iter.next().ok_or_else(|| {
408 FormulaDslError::ParseError {
409 reason: format!("invalid term syntax in `{raw}`: empty product"),
410 }
411 .to_string()
412 })?;
413 let mut acc = expand_expr(first, raw)?;
414 while let Some(op) = iter.next() {
415 if op.as_rule() != Rule::mul_op {
416 return Err(FormulaDslError::ParseError {
417 reason: format!(
418 "invalid term syntax in `{raw}`: expected `*` or `/`, got `{:?}`",
419 op.as_rule()
420 ),
421 }
422 .into());
423 }
424 let op_str = op.as_str().trim();
425 let operand = iter.next().ok_or_else(|| {
426 FormulaDslError::ParseError {
427 reason: format!("invalid term syntax in `{raw}`: dangling `{op_str}`"),
428 }
429 .to_string()
430 })?;
431 let rhs = expand_expr(operand, raw)?;
432 acc = match op_str {
433 "*" => wr_cross(acc, rhs),
434 "/" => wr_nest(acc, rhs),
435 other => {
436 return Err(FormulaDslError::ParseError {
437 reason: format!(
438 "invalid term syntax in `{raw}`: unrecognized mul operator `{other}`"
439 ),
440 }
441 .into());
442 }
443 };
444 }
445 Ok(acc)
446 }
447 Rule::interact => {
448 let mut iter = pair.into_inner();
449 let first = iter.next().ok_or_else(|| {
450 FormulaDslError::ParseError {
451 reason: format!("invalid term syntax in `{raw}`: empty interact"),
452 }
453 .to_string()
454 })?;
455 let mut acc = expand_expr(first, raw)?;
456 while let Some(op) = iter.next() {
457 if op.as_rule() != Rule::interact_op {
458 return Err(FormulaDslError::ParseError {
459 reason: format!(
460 "invalid term syntax in `{raw}`: expected `:`, got `{:?}`",
461 op.as_rule()
462 ),
463 }
464 .into());
465 }
466 let operand = iter.next().ok_or_else(|| {
467 FormulaDslError::ParseError {
468 reason: format!("invalid term syntax in `{raw}`: dangling `:`"),
469 }
470 .to_string()
471 })?;
472 let rhs = expand_expr(operand, raw)?;
473 acc = wr_interact(acc, rhs, raw)?;
474 }
475 Ok(acc)
476 }
477 Rule::power => {
478 let mut iter = pair.into_inner();
479 let first = iter.next().ok_or_else(|| {
480 FormulaDslError::ParseError {
481 reason: format!("invalid term syntax in `{raw}`: empty power"),
482 }
483 .to_string()
484 })?;
485 let base = expand_expr(first, raw)?;
486 let Some(op) = iter.next() else {
487 return Ok(base);
488 };
489 if op.as_rule() != Rule::pow_op {
490 return Err(FormulaDslError::ParseError {
491 reason: format!(
492 "invalid term syntax in `{raw}`: expected `^`, got `{:?}`",
493 op.as_rule()
494 ),
495 }
496 .into());
497 }
498 let exponent_pair = iter.next().ok_or_else(|| {
499 FormulaDslError::ParseError {
500 reason: format!("invalid term syntax in `{raw}`: dangling `^`"),
501 }
502 .to_string()
503 })?;
504 let exp_text = exponent_pair.as_str().trim();
505 let n: usize = exp_text.parse().map_err(|_| {
506 FormulaDslError::ParseError {
507 reason: format!(
508 "invalid term syntax in `{raw}`: `^` exponent must be a positive integer, got `{exp_text}`"
509 ),
510 }
511 .to_string()
512 })?;
513 if n == 0 {
514 return Err(FormulaDslError::ParseError {
515 reason: format!(
516 "invalid term syntax in `{raw}`: `^0` is not a meaningful formula expansion"
517 ),
518 }
519 .into());
520 }
521 if let Some(extra_op) = iter.next() {
522 let extra = extra_op.as_str().trim();
523 return Err(FormulaDslError::ParseError {
524 reason: format!(
525 "invalid term syntax in `{raw}`: chained `^` operators are not supported; \
526 use one positive integer exponent, got another `{extra}`"
527 ),
528 }
529 .into());
530 }
531 Ok(wr_power(base, n))
532 }
533 Rule::unary => {
534 let unary_start = pair.as_span().start();
535 for inner in pair.into_inner() {
536 if inner.as_rule() == Rule::primary {
537 let prefix_len = inner.as_span().start().saturating_sub(unary_start);
538 if prefix_len > 0 {
539 let prefix = &raw[unary_start..inner.as_span().start()];
540 if prefix.chars().any(|ch| matches!(ch, '+' | '-')) {
541 return Err(FormulaDslError::IncompatibleTerm {
542 reason: format!(
543 "unary `+`/`-` is not supported inside a formula term in `{raw}`"
544 ),
545 }
546 .into());
547 }
548 }
549 return expand_expr(inner, raw);
550 }
551 }
552 Err(FormulaDslError::ParseError {
553 reason: format!("invalid term syntax in `{raw}`: empty unary"),
554 }
555 .into())
556 }
557 Rule::primary => {
558 let span = pair.as_str().trim().to_string();
559 let inner = pair.into_inner().next();
560 match inner {
561 Some(child) if child.as_rule() == Rule::expr => expand_expr(child, raw),
562 Some(child) => Ok(vec![vec![child.as_str().trim().to_string()]]),
563 None => Ok(vec![vec![span]]),
564 }
565 }
566 _ => Err(FormulaDslError::ParseError {
567 reason: format!(
568 "invalid term syntax in `{raw}`: unexpected node `{:?}`",
569 pair.as_rule()
570 ),
571 }
572 .into()),
573 }
574}
575
576fn wr_cross(left: Vec<WrAtomList>, right: Vec<WrAtomList>) -> Vec<WrAtomList> {
577 let mut out = Vec::with_capacity(left.len() + right.len() + left.len() * right.len());
579 out.extend(left.iter().cloned());
580 out.extend(right.iter().cloned());
581 for l in &left {
582 for r in &right {
583 let mut merged: WrAtomList = l.iter().cloned().chain(r.iter().cloned()).collect();
584 merged.sort();
585 merged.dedup();
586 out.push(merged);
587 }
588 }
589 out
590}
591
592fn wr_nest(left: Vec<WrAtomList>, right: Vec<WrAtomList>) -> Vec<WrAtomList> {
593 let mut out = Vec::with_capacity(left.len() + left.len() * right.len());
595 out.extend(left.iter().cloned());
596 for l in &left {
597 for r in &right {
598 let mut merged: WrAtomList = l.iter().cloned().chain(r.iter().cloned()).collect();
599 merged.sort();
600 merged.dedup();
601 out.push(merged);
602 }
603 }
604 out
605}
606
607fn wr_interact(
608 left: Vec<WrAtomList>,
609 right: Vec<WrAtomList>,
610 raw: &str,
611) -> Result<Vec<WrAtomList>, String> {
612 let mut out = Vec::with_capacity(left.len() * right.len());
614 for l in &left {
615 for r in &right {
616 let combined: WrAtomList = l.iter().cloned().chain(r.iter().cloned()).collect();
617 for atom in &combined {
621 if atom.contains('(') {
622 return Err(FormulaDslError::IncompatibleTerm {
623 reason: format!(
624 "interaction operator `:` with function-call atom is not supported in `{raw}`. \
625 Use te(...) for smooth interactions or group()/factor() with a separate \
626 interaction strategy for categorical effects."
627 ),
628 }
629 .into());
630 }
631 }
632 let mut merged = combined;
633 merged.sort();
634 merged.dedup();
635 out.push(merged);
636 }
637 }
638 Ok(out)
639}
640
641fn wr_power(base: Vec<WrAtomList>, n: usize) -> Vec<WrAtomList> {
642 if base.is_empty() {
647 return Vec::new();
648 }
649 let m = base.len();
650 let mut out = Vec::<WrAtomList>::new();
651 let max_size = n.min(m);
652 for size in 1..=max_size {
653 let mut indices: Vec<usize> = (0..size).collect();
655 loop {
656 let mut merged = WrAtomList::new();
657 for &i in &indices {
658 merged.extend(base[i].iter().cloned());
659 }
660 merged.sort();
661 merged.dedup();
662 out.push(merged);
663 let mut k = size;
665 while k > 0 {
666 k -= 1;
667 if indices[k] != k + m - size {
668 indices[k] += 1;
669 for j in (k + 1)..size {
670 indices[j] = indices[j - 1] + 1;
671 }
672 break;
673 }
674 if k == 0 {
675 k = usize::MAX;
676 break;
677 }
678 }
679 if k == usize::MAX {
680 break;
681 }
682 }
683 }
684 out
685}
686
687fn is_exact_ident(raw: &str) -> bool {
688 let mut chars = raw.chars();
689 let Some(first) = chars.next() else {
690 return false;
691 };
692 if !first.is_ascii_alphabetic() && first != '_' {
693 return false;
694 }
695 chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '.')
696}
697
698pub fn parse_function_call(input: &str) -> Result<FunctionCallSpec, String> {
699 validate_balanced_delimiters(input, "invalid function call syntax")?;
700 let mut parsed = FormulaParser::parse(Rule::top_function_call, input).map_err(|e| {
701 FormulaDslError::ParseError {
702 reason: format!("invalid function call syntax: {e}"),
703 }
704 })?;
705 let top = parsed.next().ok_or_else(|| FormulaDslError::ParseError {
706 reason: "invalid function call syntax: empty parse".to_string(),
707 })?;
708 let call = top
709 .into_inner()
710 .find(|p| p.as_rule() == Rule::function_call)
711 .ok_or_else(|| FormulaDslError::ParseError {
712 reason: "invalid function call syntax: missing call".to_string(),
713 })?;
714 parse_call_pair(call)
715}
716
717fn parse_call_pair(call: Pair<'_, Rule>) -> Result<FunctionCallSpec, String> {
718 let mut name: Option<String> = None;
719 let mut args = Vec::<CallArgSpec>::new();
720 for part in call.into_inner() {
721 match part.as_rule() {
722 Rule::ident => {
723 if name.is_none() {
724 name = Some(part.as_str().trim().to_string());
725 }
726 }
727 Rule::arg_list => {
728 for a in part.into_inner() {
729 if a.as_rule() != Rule::arg {
730 continue;
731 }
732 let mut a_inner = a.into_inner();
733 let Some(first) = a_inner.next() else {
734 continue;
735 };
736 match first.as_rule() {
737 Rule::named_arg => {
738 let mut ni = first.into_inner();
739 let key = ni
740 .next()
741 .ok_or_else(|| FormulaDslError::ParseError {
742 reason: "invalid named argument key".to_string(),
743 })?
744 .as_str()
745 .trim()
746 .to_ascii_lowercase();
747 let value = ni
748 .next()
749 .ok_or_else(|| FormulaDslError::ParseError {
750 reason: "invalid named argument value".to_string(),
751 })?
752 .as_str()
753 .trim()
754 .to_string();
755 args.push(CallArgSpec::Named { key, value });
756 }
757 Rule::expr => {
758 args.push(CallArgSpec::Positional(first.as_str().trim().to_string()));
759 }
760 _ => {}
761 }
762 }
763 }
764 _ => {}
765 }
766 }
767 let name = name.ok_or_else(|| FormulaDslError::ParseError {
768 reason: "invalid function call: missing name".to_string(),
769 })?;
770 Ok(FunctionCallSpec { name, args })
771}
772
773#[cfg(test)]
774mod tests {
775 use super::{
776 CallArgSpec, ParsedTerm, parse_formula, parse_formula_dsl, parse_function_call,
777 parse_linkwiggle_formulaspec, parsed_term_column_names, parsed_terms_reference_column,
778 validate_marginal_slope_z_column_exclusion,
779 };
780 use std::collections::{BTreeMap, BTreeSet};
781
782 #[test]
783 fn parsed_term_column_names_includes_by_smooth_grouping_variable() {
784 let parsed =
792 parse_formula("y ~ s(x, by=g) + z + a:b").expect("formula with a by= smooth parses");
793 let mut cols = BTreeSet::<String>::new();
794 parsed_term_column_names(&parsed.terms, &mut cols);
795 for expected in ["x", "g", "z", "a", "b"] {
796 assert!(
797 cols.contains(expected),
798 "parsed_term_column_names dropped '{expected}'; got {cols:?}"
799 );
800 }
801 assert!(
803 !cols.contains("y"),
804 "response leaked into term columns: {cols:?}"
805 );
806 }
807
808 #[test]
809 fn linkwiggle_parser_does_not_bake_in_cubic_only_restriction() {
810 for deg in [2usize, 4, 5, 10] {
820 let mut options = BTreeMap::new();
821 options.insert("degree".to_string(), deg.to_string());
822 options.insert("internal_knots".to_string(), "3".to_string());
823 let raw = format!("timewiggle(degree={deg}, internal_knots=3)");
824 let spec = parse_linkwiggle_formulaspec(&options, &raw)
825 .expect("non-cubic wiggle degree must parse at the shared layer");
826 assert_eq!(
827 spec.degree, deg,
828 "parser must carry the requested degree through verbatim"
829 );
830 }
831
832 let mut zero = BTreeMap::new();
835 zero.insert("degree".to_string(), "0".to_string());
836 zero.insert("internal_knots".to_string(), "3".to_string());
837 let err = parse_linkwiggle_formulaspec(&zero, "linkwiggle(degree=0, internal_knots=3)")
838 .expect_err("degree=0 must be rejected");
839 assert!(
840 err.contains("degree >= 1"),
841 "error should state the positive-degree lower bound, got: {err}"
842 );
843 }
844
845 #[test]
846 fn parses_nested_formula_terms() {
847 let parsed =
848 parse_formula_dsl("log(y) ~ x1 + s(log(x2 + 1), bs=\"tps\", k=10) + te(x3, x4)")
849 .expect("parse");
850 assert_eq!(parsed.response_expr, "log(y)");
851 assert_eq!(parsed.rhs_terms.len(), 3);
852 assert_eq!(parsed.rhs_terms[0], "x1");
853 assert_eq!(parsed.rhs_terms[1], "s(log(x2 + 1), bs=\"tps\", k=10)");
854 assert_eq!(parsed.rhs_terms[2], "te(x3, x4)");
855 }
856
857 #[test]
858 fn parses_cyclic_formula_aliases() {
859 let parsed = parse_formula("y ~ cyclic(theta, period_start=0, period_end=6.283)")
860 .expect("parse cyclic formula");
861 match &parsed.terms[0] {
862 super::ParsedTerm::Smooth { vars, options, .. } => {
863 assert_eq!(vars, &vec!["theta".to_string()]);
864 assert_eq!(options.get("type").map(String::as_str), Some("cyclic"));
865 assert_eq!(options.get("period_start").map(String::as_str), Some("0"));
866 }
867 other => panic!("expected cyclic smooth term, got {other:?}"),
868 }
869 }
870
871 #[test]
872 fn sphere_aliases_all_dispatch_to_intrinsic_s2_basis() {
873 for alias in ["sphere", "sos", "spherical", "s2"] {
880 let parsed = parse_formula(&format!("y ~ {alias}(lat, lon)"))
881 .unwrap_or_else(|e| panic!("parse {alias}: {e}"));
882 match &parsed.terms[0] {
883 super::ParsedTerm::Smooth { vars, options, .. } => {
884 assert_eq!(
885 vars,
886 &vec!["lat".to_string(), "lon".to_string()],
887 "{alias} should keep (lat, lon) as its variables"
888 );
889 assert_eq!(
890 options.get("type").map(String::as_str),
891 Some("sphere"),
892 "{alias} must dispatch to the intrinsic sphere basis (type=sphere)"
893 );
894 }
895 other => panic!("expected sphere smooth term for {alias}, got {other:?}"),
896 }
897 }
898 }
899
900 #[test]
901 fn parses_function_callwithnamed_and_positional_args() {
902 let call = parse_function_call("s(log(x + 1), type=\"duchon\", centers=12)").expect("call");
903 assert_eq!(call.name, "s");
904 assert_eq!(call.args.len(), 3);
905 assert_eq!(
906 call.args[0],
907 CallArgSpec::Positional("log(x + 1)".to_string())
908 );
909 assert_eq!(
910 call.args[1],
911 CallArgSpec::Named {
912 key: "type".to_string(),
913 value: "\"duchon\"".to_string()
914 }
915 );
916 }
917
918 #[test]
919 fn parses_tensor_boundary_list_options() {
920 let call = parse_function_call(
921 "te(day_of_week, hour, boundary=['periodic', 'periodic'], period=[7, 24])",
922 )
923 .expect("call");
924 assert_eq!(call.name, "te");
925 assert_eq!(call.args.len(), 4);
926 assert_eq!(
927 call.args[2],
928 CallArgSpec::Named {
929 key: "boundary".to_string(),
930 value: "['periodic', 'periodic']".to_string(),
931 }
932 );
933 }
934
935 #[test]
936 fn parse_formula_dsl_reports_unbalanced_parentheses() {
937 let err = parse_formula_dsl("y ~ s(x, k=10").expect_err("expected parse failure");
938 assert!(err.contains("unbalanced parentheses"));
939 }
940
941 #[test]
942 fn parse_function_call_reports_unbalanced_parentheses() {
943 let err = parse_function_call("s(x, k=10").expect_err("expected parse failure");
944 assert!(err.contains("unbalanced parentheses"));
945 }
946
947 #[test]
948 fn parse_formula_accepts_tuple_smooth_options() {
949 let parsed = parse_formula("z ~ te(x, y, k=(20, 20))")
950 .expect("tuple-valued smooth option should parse");
951 assert_eq!(parsed.terms.len(), 1);
952
953 let dsl = parse_formula_dsl("z ~ te(x, y, k=(20, 20))")
954 .expect("tuple-valued smooth option should parse in the DSL layer");
955 assert_eq!(dsl.rhs_terms, vec!["te(x, y, k=(20, 20))"]);
956
957 let call = parse_function_call("te(x, y, k=(20, 20))")
958 .expect("tuple-valued smooth option should parse as a function call");
959 assert_eq!(
960 call.args[2],
961 CallArgSpec::Named {
962 key: "k".to_string(),
963 value: "(20, 20)".to_string(),
964 }
965 );
966 }
967
968 #[test]
969 fn parse_formula_rejects_unsupported_top_level_rhs_expressions() {
970 for formula in ["y ~ x - z", "y ~ -x", "y ~ (x)", "y ~ x - 1"] {
979 let err = parse_formula(formula).expect_err("expected formula parse failure");
980 assert!(err.to_string().contains("unsupported top-level RHS term"));
981 }
982 }
983
984 #[test]
990 fn parse_formula_supports_wr_slash_nesting() {
991 let parsed = parse_formula("y ~ x / z").expect("`/` is supported as WR nesting");
992 assert_eq!(parsed.response, "y");
993 assert_eq!(parsed.terms.len(), 2);
994 let names: Vec<String> = parsed
995 .terms
996 .iter()
997 .map(|t| match t {
998 ParsedTerm::Linear { name, .. } => format!("Linear({name})"),
999 ParsedTerm::Interaction { vars } => format!("Interaction({})", vars.join(":")),
1000 other => format!("Other({other:?})"),
1001 })
1002 .collect();
1003 assert_eq!(
1004 names,
1005 vec!["Linear(x)".to_string(), "Interaction(x:z)".to_string()]
1006 );
1007 }
1008
1009 #[test]
1014 fn parse_formula_supports_wr_star_crossing() {
1015 let parsed = parse_formula("y ~ x * z").expect("`*` is supported as WR crossing");
1016 assert_eq!(parsed.response, "y");
1017 assert_eq!(parsed.terms.len(), 3);
1018 let names: Vec<String> = parsed
1019 .terms
1020 .iter()
1021 .map(|t| match t {
1022 ParsedTerm::Linear { name, .. } => format!("Linear({name})"),
1023 ParsedTerm::Interaction { vars } => format!("Interaction({})", vars.join(":")),
1024 other => format!("Other({other:?})"),
1025 })
1026 .collect();
1027 assert_eq!(
1028 names,
1029 vec![
1030 "Linear(x)".to_string(),
1031 "Linear(z)".to_string(),
1032 "Interaction(x:z)".to_string(),
1033 ]
1034 );
1035 }
1036
1037 #[test]
1038 fn parse_formula_rejects_unary_signs_inside_wr_expansion() {
1039 for formula in ["y ~ x:-z", "y ~ a*-b", "y ~ x/-z", "y ~ x:+z"] {
1040 let err = parse_formula(formula)
1041 .expect_err("WR expansion must not silently drop unary signs");
1042 let msg = err.to_string();
1043 assert!(
1044 msg.contains("unary `+`/`-` is not supported"),
1045 "unexpected error for {formula}: {msg}"
1046 );
1047 }
1048 }
1049
1050 #[test]
1051 fn parse_formula_supports_wr_power_crossing() {
1052 let parsed = parse_formula("y ~ (x + z)^2").expect("`^` is supported as WR power");
1053 assert_eq!(parsed.response, "y");
1054 assert_eq!(parsed.terms.len(), 3);
1055 let names: Vec<String> = parsed
1056 .terms
1057 .iter()
1058 .map(|t| match t {
1059 ParsedTerm::Linear { name, .. } => format!("Linear({name})"),
1060 ParsedTerm::Interaction { vars } => format!("Interaction({})", vars.join(":")),
1061 other => format!("Other({other:?})"),
1062 })
1063 .collect();
1064 assert_eq!(
1065 names,
1066 vec![
1067 "Linear(x)".to_string(),
1068 "Linear(z)".to_string(),
1069 "Interaction(x:z)".to_string(),
1070 ]
1071 );
1072 }
1073
1074 #[test]
1075 fn parse_formula_rejects_chained_wr_power() {
1076 let err = parse_formula("y ~ (x + z)^2^3")
1077 .expect_err("chained WR powers must not silently drop later exponents");
1078 let msg = err.to_string();
1079 assert!(
1080 msg.contains("chained `^` operators are not supported"),
1081 "error should explain that chained powers are rejected, got: {msg}"
1082 );
1083 }
1084
1085 #[test]
1086 fn parsed_terms_reference_column_sees_the_by_smooth_variable() {
1087 let parsed = parse_formula("y ~ s(x, by=g)").expect("parse by-smooth");
1093 assert!(
1094 parsed_terms_reference_column(&parsed.terms, "g"),
1095 "s(x, by=g) references column g via options[\"by\"]"
1096 );
1097 assert!(parsed_terms_reference_column(&parsed.terms, "x"));
1098 assert!(!parsed_terms_reference_column(&parsed.terms, "absent"));
1099 }
1100
1101 #[test]
1102 fn marginal_slope_z_column_validator_detects_linear_and_smooth_reuse() {
1103 let main = parse_formula("y ~ x + z").expect("parse main");
1104 let logslope = parse_formula("y ~ s(z, type=duchon, centers=6)").expect("parse logslope");
1105
1106 assert!(parsed_terms_reference_column(&main.terms, "z"));
1107 assert!(parsed_terms_reference_column(&logslope.terms, "z"));
1108
1109 let err = validate_marginal_slope_z_column_exclusion(
1110 &main,
1111 &parse_formula("y ~ 1").expect("parse clean logslope"),
1112 "z",
1113 "bernoulli marginal-slope",
1114 "--logslope-formula",
1115 )
1116 .expect_err("main formula should be rejected");
1117 assert!(err.contains("cannot also appear in the main formula"));
1118
1119 let err = validate_marginal_slope_z_column_exclusion(
1120 &parse_formula("y ~ x").expect("parse clean main"),
1121 &logslope,
1122 "z",
1123 "bernoulli marginal-slope",
1124 "--logslope-formula",
1125 )
1126 .expect_err("logslope formula should be rejected");
1127 assert!(err.contains("cannot also appear in --logslope-formula"));
1128 }
1129
1130 #[test]
1131 fn logslope_surface_declarations_are_additive() {
1132 let parsed = parse_formula("y ~ s(pc1) + logslope(z2, s(pc2)) + logslope(z3, x3)")
1133 .expect("parse additive logslope surfaces");
1134 assert_eq!(parsed.terms.len(), 1);
1135 assert_eq!(parsed.logslope_surfaces.len(), 2);
1136 assert_eq!(parsed.logslope_surfaces[0].z_column, "z2");
1137 assert_eq!(parsed.logslope_surfaces[0].terms.len(), 1);
1138 assert_eq!(parsed.logslope_surfaces[1].z_column, "z3");
1139 assert_eq!(parsed.logslope_surfaces[1].terms.len(), 1);
1140 }
1141
1142 #[test]
1143 fn marginal_slope_z_column_validator_reserves_all_surface_z_columns() {
1144 let main = parse_formula("y ~ x").expect("parse main");
1145 let logslope = parse_formula("y ~ s(pc1) + logslope(z2, s(z3)) + logslope(z3, x)")
1146 .expect("parse logslope surfaces");
1147 let err = validate_marginal_slope_z_column_exclusion(
1148 &main,
1149 &logslope,
1150 "z1",
1151 "bernoulli marginal-slope",
1152 "--logslope-formula",
1153 )
1154 .expect_err("surface formula should reject another reserved z coordinate");
1155 assert!(err.contains("reserves z column 'z3'"));
1156 }
1157}
1158
1159#[derive(Clone, Debug)]
1164pub struct LinkWiggleFormulaSpec {
1165 pub degree: usize,
1166 pub num_internal_knots: usize,
1167 pub penalty_orders: Vec<usize>,
1168 pub double_penalty: bool,
1169}
1170
1171pub fn default_linkwiggle_formulaspec() -> LinkWiggleFormulaSpec {
1172 let cfg = WigglePenaltyConfig::cubic_triple_operator_default();
1173 LinkWiggleFormulaSpec {
1174 degree: cfg.degree,
1175 num_internal_knots: cfg.num_internal_knots,
1176 penalty_orders: cfg.penalty_orders,
1177 double_penalty: cfg.double_penalty,
1178 }
1179}
1180
1181#[derive(Clone, Debug)]
1182pub struct LinkFormulaSpec {
1183 pub link: String,
1184 pub mixture_rho: Option<String>,
1185 pub sas_init: Option<String>,
1186 pub beta_logistic_init: Option<String>,
1187}
1188
1189#[derive(Clone, Debug)]
1190pub struct SurvivalFormulaSpec {
1191 pub spec: Option<String>,
1192 pub survival_distribution: Option<String>,
1193}
1194
1195#[derive(Clone, Debug)]
1196pub struct ParsedFormula {
1197 pub response: String,
1198 pub terms: Vec<ParsedTerm>,
1199 pub logslope_surfaces: Vec<LogSlopeSurfaceSpec>,
1200 pub linkwiggle: Option<LinkWiggleFormulaSpec>,
1201 pub timewiggle: Option<LinkWiggleFormulaSpec>,
1202 pub linkspec: Option<LinkFormulaSpec>,
1203 pub survivalspec: Option<SurvivalFormulaSpec>,
1204}
1205
1206#[derive(Clone, Debug)]
1207pub struct LogSlopeSurfaceSpec {
1208 pub z_column: String,
1209 pub terms: Vec<ParsedTerm>,
1210}
1211
1212pub fn marginal_slope_logslope_surfaces(
1213 logslope_formula: &ParsedFormula,
1214 default_z_column: &str,
1215) -> Result<Vec<LogSlopeSurfaceSpec>, String> {
1216 let mut surfaces = Vec::new();
1217 if !logslope_formula.terms.is_empty() {
1218 surfaces.push(LogSlopeSurfaceSpec {
1219 z_column: default_z_column.to_string(),
1220 terms: logslope_formula.terms.clone(),
1221 });
1222 }
1223 surfaces.extend(logslope_formula.logslope_surfaces.clone());
1224 if surfaces.is_empty() {
1225 surfaces.push(LogSlopeSurfaceSpec {
1226 z_column: default_z_column.to_string(),
1227 terms: Vec::new(),
1228 });
1229 }
1230 let mut seen = std::collections::BTreeSet::<String>::new();
1231 for surface in &surfaces {
1232 if !seen.insert(surface.z_column.clone()) {
1233 return Err(FormulaDslError::IncompatibleTerm {
1234 reason: format!(
1235 "logslope formula declares z column '{}' more than once; each z coordinate needs exactly one log-slope surface",
1236 surface.z_column
1237 ),
1238 }
1239 .into());
1240 }
1241 }
1242 Ok(surfaces)
1243}
1244
1245#[derive(Clone, Debug)]
1246pub enum ParsedTerm {
1247 Linear {
1248 name: String,
1249 explicit: bool,
1250 coefficient_min: Option<f64>,
1251 coefficient_max: Option<f64>,
1252 },
1253 BoundedLinear {
1254 name: String,
1255 min: f64,
1256 max: f64,
1257 prior: BoundedCoefficientPriorSpec,
1258 },
1259 RandomEffect {
1260 name: String,
1261 },
1262 Smooth {
1263 label: String,
1264 vars: Vec<String>,
1265 kind: SmoothKind,
1266 options: BTreeMap<String, String>,
1267 },
1268 LinkWiggle {
1269 options: BTreeMap<String, String>,
1270 },
1271 TimeWiggle {
1272 options: BTreeMap<String, String>,
1273 },
1274 LinkConfig {
1275 options: BTreeMap<String, String>,
1276 },
1277 SurvivalConfig {
1278 options: BTreeMap<String, String>,
1279 },
1280 LogSlopeSurface {
1281 z_column: String,
1282 terms: Vec<ParsedTerm>,
1283 },
1284 Interaction {
1292 vars: Vec<String>,
1293 },
1294}
1295
1296pub fn parsed_term_column_names(
1305 terms: &[ParsedTerm],
1306 out: &mut std::collections::BTreeSet<String>,
1307) {
1308 for term in terms {
1309 match term {
1310 ParsedTerm::Linear { name, .. }
1311 | ParsedTerm::BoundedLinear { name, .. }
1312 | ParsedTerm::RandomEffect { name } => {
1313 out.insert(name.clone());
1314 }
1315 ParsedTerm::Smooth { vars, options, .. } => {
1316 out.extend(vars.iter().cloned());
1317 if let Some(by) = options.get("by") {
1318 out.insert(by.clone());
1319 }
1320 }
1321 ParsedTerm::Interaction { vars } => {
1322 out.extend(vars.iter().cloned());
1323 }
1324 ParsedTerm::LinkWiggle { .. }
1325 | ParsedTerm::TimeWiggle { .. }
1326 | ParsedTerm::LinkConfig { .. }
1327 | ParsedTerm::SurvivalConfig { .. } => {}
1328 ParsedTerm::LogSlopeSurface { z_column, terms } => {
1329 out.insert(z_column.clone());
1330 parsed_term_column_names(terms, out);
1331 }
1332 }
1333 }
1334}
1335
1336pub fn parsed_terms_reference_column(terms: &[ParsedTerm], column_name: &str) -> bool {
1337 terms.iter().any(|term| match term {
1338 ParsedTerm::Linear { name, .. }
1339 | ParsedTerm::BoundedLinear { name, .. }
1340 | ParsedTerm::RandomEffect { name } => name == column_name,
1341 ParsedTerm::Smooth { vars, options, .. } => {
1342 vars.iter().any(|var| var == column_name)
1343 || options.get("by").is_some_and(|by| by == column_name)
1344 }
1345 ParsedTerm::Interaction { vars } => vars.iter().any(|var| var == column_name),
1346 ParsedTerm::LinkWiggle { .. }
1347 | ParsedTerm::TimeWiggle { .. }
1348 | ParsedTerm::LinkConfig { .. }
1349 | ParsedTerm::SurvivalConfig { .. } => false,
1350 ParsedTerm::LogSlopeSurface { z_column, terms } => {
1351 z_column == column_name || parsed_terms_reference_column(terms, column_name)
1352 }
1353 })
1354}
1355
1356pub fn validate_marginal_slope_z_column_exclusion(
1357 main_formula: &ParsedFormula,
1358 logslope_formula: &ParsedFormula,
1359 z_column: &str,
1360 context: &str,
1361 logslope_label: &str,
1362) -> Result<(), String> {
1363 let surfaces = marginal_slope_logslope_surfaces(logslope_formula, z_column)?;
1364 for z_column in surfaces.iter().map(|surface| surface.z_column.as_str()) {
1365 if parsed_terms_reference_column(&main_formula.terms, z_column) {
1366 return Err(FormulaDslError::IncompatibleTerm {
1367 reason: format!(
1368 "{context} reserves z column '{z_column}' as the auxiliary latent score; it cannot also appear in the main formula"
1369 ),
1370 }
1371 .into());
1372 }
1373 }
1374 for reserved in surfaces.iter().map(|surface| surface.z_column.as_str()) {
1375 if parsed_terms_reference_column(&logslope_formula.terms, reserved) {
1376 return Err(FormulaDslError::IncompatibleTerm {
1377 reason: format!(
1378 "{context} reserves z column '{reserved}' as the auxiliary latent score; it cannot also appear in {logslope_label}"
1379 ),
1380 }
1381 .into());
1382 }
1383 for surface in &surfaces {
1384 if parsed_terms_reference_column(&surface.terms, reserved) {
1385 return Err(FormulaDslError::IncompatibleTerm {
1386 reason: format!(
1387 "{context} reserves z column '{reserved}' as an auxiliary latent score; it cannot also appear in {logslope_label}"
1388 ),
1389 }
1390 .into());
1391 }
1392 }
1393 }
1394 Ok(())
1395}
1396
1397#[derive(Clone, Copy, Debug)]
1398pub enum SmoothKind {
1399 S,
1400 Te,
1401 T2,
1405 Ti,
1411}
1412
1413#[derive(Clone, Copy, Debug)]
1414pub enum LinkMode {
1415 Strict,
1416 Flexible,
1417}
1418
1419#[derive(Clone, Debug)]
1420pub struct LinkChoice {
1421 pub mode: LinkMode,
1422 pub link: LinkFunction,
1423 pub mixture_components: Option<Vec<LinkComponent>>,
1424}
1425
1426pub fn effectivelinkwiggle_formulaspec(
1431 formula_linkwiggle: Option<&LinkWiggleFormulaSpec>,
1432 link_choice: Option<&LinkChoice>,
1433) -> Option<LinkWiggleFormulaSpec> {
1434 formula_linkwiggle.cloned().or_else(|| {
1435 link_choice.and_then(|choice| {
1436 if matches!(choice.mode, LinkMode::Flexible) {
1437 Some(default_linkwiggle_formulaspec())
1438 } else {
1439 None
1440 }
1441 })
1442 })
1443}
1444
1445pub const fn linkname_supports_joint_wiggle(link: LinkFunction) -> bool {
1446 !matches!(link, LinkFunction::Sas | LinkFunction::BetaLogistic)
1447}
1448
1449pub const fn linkchoice_supports_joint_wiggle(choice: &LinkChoice) -> bool {
1450 match &choice.mixture_components {
1451 None => linkname_supports_joint_wiggle(choice.link),
1452 Some(_) => false,
1453 }
1454}
1455
1456pub fn require_linkchoice_supports_joint_wiggle(
1457 choice: &LinkChoice,
1458 context: &str,
1459) -> Result<(), String> {
1460 if linkchoice_supports_joint_wiggle(choice) {
1461 Ok(())
1462 } else {
1463 Err(joint_wiggle_unsupported_link_message(context))
1464 }
1465}
1466
1467pub const fn likelihood_spec_supports_joint_wiggle(likelihood: &LikelihoodSpec) -> bool {
1468 inverse_link_supports_joint_wiggle(&likelihood.link)
1469}
1470
1471pub fn require_likelihood_spec_supports_joint_wiggle(
1472 likelihood: &LikelihoodSpec,
1473 context: &str,
1474) -> Result<(), String> {
1475 if likelihood_spec_supports_joint_wiggle(likelihood) {
1476 Ok(())
1477 } else {
1478 Err(joint_wiggle_unsupported_link_message(context))
1479 }
1480}
1481
1482pub const fn inverse_link_supports_joint_wiggle(link: &InverseLink) -> bool {
1483 matches!(
1484 link,
1485 InverseLink::Standard(StandardLink::Identity)
1486 | InverseLink::Standard(StandardLink::Log)
1487 | InverseLink::Standard(StandardLink::Logit)
1488 | InverseLink::Standard(StandardLink::Probit)
1489 | InverseLink::Standard(StandardLink::CLogLog)
1490 )
1491}
1492
1493pub fn require_inverse_link_supports_joint_wiggle(
1494 link: &InverseLink,
1495 context: &str,
1496) -> Result<(), String> {
1497 if inverse_link_supports_joint_wiggle(link) {
1498 Ok(())
1499 } else {
1500 Err(joint_wiggle_unsupported_link_message(context))
1501 }
1502}
1503
1504pub const fn binomial_inverse_link_supports_joint_wiggle(link: &InverseLink) -> bool {
1505 matches!(
1506 link,
1507 InverseLink::Standard(StandardLink::Logit)
1508 | InverseLink::Standard(StandardLink::Probit)
1509 | InverseLink::Standard(StandardLink::CLogLog)
1510 )
1511}
1512
1513pub fn require_binomial_inverse_link_supports_joint_wiggle(
1514 link: &InverseLink,
1515 context: &str,
1516) -> Result<(), String> {
1517 if binomial_inverse_link_supports_joint_wiggle(link) {
1518 Ok(())
1519 } else {
1520 Err(FormulaDslError::IncompatibleTerm {
1521 reason: format!(
1522 "{context} does not support identity, log, latent-cloglog, SAS, BetaLogistic, or Mixture links; wiggle is only available for jointly fitted logit/probit/cloglog links"
1523 ),
1524 }
1525 .into())
1526 }
1527}
1528
1529pub fn joint_wiggle_unsupported_link_message(context: &str) -> String {
1530 format!(
1531 "{context} does not support latent-cloglog, SAS, BetaLogistic, or Mixture links; wiggle is only available for jointly fitted standard links"
1532 )
1533}
1534
1535pub fn option_usize(map: &BTreeMap<String, String>, key: &str) -> Option<usize> {
1540 map.get(key).and_then(|v| v.parse::<usize>().ok())
1541}
1542
1543fn validate_known_term_options(
1550 term_name: &str,
1551 options: &BTreeMap<String, String>,
1552 known: &[&str],
1553 raw: &str,
1554) -> Result<(), String> {
1555 let known_set: std::collections::BTreeSet<&&str> = known.iter().collect();
1556 for key in options.keys() {
1557 if !known_set.contains(&key.as_str()) {
1558 let known_sorted = {
1559 let mut v = known.to_vec();
1560 v.sort_unstable();
1561 v.join(", ")
1562 };
1563 let known_hint = if known.is_empty() {
1564 "no options".to_string()
1565 } else {
1566 format!("[{known_sorted}]")
1567 };
1568 return Err(FormulaDslError::InvalidArgument {
1569 reason: format!(
1570 "{term_name}() does not accept option `{key}` (in `{raw}`); known options: {known_hint}"
1571 ),
1572 }
1573 .into());
1574 }
1575 }
1576 Ok(())
1577}
1578
1579pub fn option_usize_any(map: &BTreeMap<String, String>, keys: &[&str]) -> Option<usize> {
1580 for key in keys {
1581 if let Some(v) = option_usize(map, key) {
1582 return Some(v);
1583 }
1584 }
1585 None
1586}
1587
1588pub fn option_usize_strict(
1595 map: &BTreeMap<String, String>,
1596 key: &str,
1597) -> Result<Option<usize>, String> {
1598 match map.get(key) {
1599 None => Ok(None),
1600 Some(raw) => raw.parse::<usize>().map(Some).map_err(|err| {
1601 FormulaDslError::InvalidArgument {
1602 reason: format!(
1603 "option `{key}={raw}` is not a non-negative integer; \
1604 expected a whole number >= 0: {err}"
1605 ),
1606 }
1607 .into()
1608 }),
1609 }
1610}
1611
1612pub fn option_usize_any_strict(
1615 map: &BTreeMap<String, String>,
1616 keys: &[&str],
1617) -> Result<Option<usize>, String> {
1618 for key in keys {
1619 if let Some(v) = option_usize_strict(map, key)? {
1620 return Ok(Some(v));
1621 }
1622 }
1623 Ok(None)
1624}
1625
1626pub fn option_f64(map: &BTreeMap<String, String>, key: &str) -> Option<f64> {
1627 map.get(key).and_then(|v| v.parse::<f64>().ok())
1628}
1629
1630pub fn option_f64_strict(map: &BTreeMap<String, String>, key: &str) -> Result<Option<f64>, String> {
1634 match map.get(key) {
1635 None => Ok(None),
1636 Some(raw) => match raw.parse::<f64>() {
1637 Ok(v) if v.is_finite() => Ok(Some(v)),
1638 Ok(v) => Err(FormulaDslError::InvalidArgument {
1639 reason: format!("option `{key}={raw}` parses as {v} which is not a finite number"),
1640 }
1641 .into()),
1642 Err(err) => Err(FormulaDslError::InvalidArgument {
1643 reason: format!(
1644 "option `{key}={raw}` is not a valid number; expected a finite decimal: {err}"
1645 ),
1646 }
1647 .into()),
1648 },
1649 }
1650}
1651
1652pub fn option_bool(map: &BTreeMap<String, String>, key: &str) -> Option<bool> {
1653 map.get(key)
1654 .and_then(|v| match v.trim().to_ascii_lowercase().as_str() {
1655 "true" | "1" | "yes" | "y" => Some(true),
1656 "false" | "0" | "no" | "n" => Some(false),
1657 _ => None,
1658 })
1659}
1660
1661pub fn option_bool_strict(
1667 map: &BTreeMap<String, String>,
1668 key: &str,
1669) -> Result<Option<bool>, String> {
1670 match map.get(key) {
1671 None => Ok(None),
1672 Some(raw) => match raw.trim().to_ascii_lowercase().as_str() {
1673 "true" | "1" | "yes" | "y" => Ok(Some(true)),
1674 "false" | "0" | "no" | "n" => Ok(Some(false)),
1675 _ => Err(FormulaDslError::InvalidArgument {
1676 reason: format!(
1677 "option `{key}={raw}` is not a boolean; \
1678 expected one of true/false/yes/no/1/0"
1679 ),
1680 }
1681 .into()),
1682 },
1683 }
1684}
1685
1686pub fn strip_quotes(v: &str) -> &str {
1687 let b = v.as_bytes();
1688 if b.len() >= 2
1689 && ((b[0] == b'\'' && b[b.len() - 1] == b'\'') || (b[0] == b'"' && b[b.len() - 1] == b'"'))
1690 {
1691 &v[1..v.len() - 1]
1692 } else {
1693 v
1694 }
1695}
1696
1697fn parse_linear_constraint_bounds(
1702 options: &BTreeMap<String, String>,
1703 raw: &str,
1704) -> Result<(Option<f64>, Option<f64>), String> {
1705 let min = parse_optional_f64_option_alias(options, &["min", "lower"], raw, "linear")?;
1706 let max = parse_optional_f64_option_alias(options, &["max", "upper"], raw, "linear")?;
1707 if let (Some(min), Some(max)) = (min, max)
1708 && (!min.is_finite() || !max.is_finite() || min > max)
1709 {
1710 return Err(FormulaDslError::InvalidArgument {
1711 reason: format!(
1712 "linear coefficient constraints require finite min <= max, got min={min}, max={max}: {raw}"
1713 ),
1714 }
1715 .into());
1716 }
1717 Ok((min, max))
1718}
1719
1720fn parse_required_f64_option(
1721 options: &BTreeMap<String, String>,
1722 key: &str,
1723 raw: &str,
1724) -> Result<f64, String> {
1725 let value = options
1726 .get(key)
1727 .ok_or_else(|| FormulaDslError::MalformedConfig {
1728 reason: format!("bounded() is missing required '{key}' argument: {raw}"),
1729 })?;
1730 value.parse::<f64>().map_err(|err| {
1731 FormulaDslError::InvalidArgument {
1732 reason: format!(
1733 "bounded() argument '{key}' must be a finite number, got '{}': {err}: {raw}",
1734 value
1735 ),
1736 }
1737 .into()
1738 })
1739}
1740
1741fn parse_optional_f64_option(
1742 options: &BTreeMap<String, String>,
1743 key: &str,
1744 raw: &str,
1745) -> Result<Option<f64>, String> {
1746 match options.get(key) {
1747 Some(value) => value.parse::<f64>().map(Some).map_err(|err| {
1748 FormulaDslError::InvalidArgument {
1749 reason: format!(
1750 "bounded() argument '{key}' must be a finite number, got '{}': {err}: {raw}",
1751 value
1752 ),
1753 }
1754 .into()
1755 }),
1756 None => Ok(None),
1757 }
1758}
1759
1760fn parse_optional_f64_option_alias(
1761 options: &BTreeMap<String, String>,
1762 keys: &[&str],
1763 raw: &str,
1764 fn_label: &str,
1765) -> Result<Option<f64>, String> {
1766 let mut found: Option<(&str, f64)> = None;
1767 for key in keys {
1768 if let Some(value) = options.get(*key) {
1769 let parsed = value
1770 .parse::<f64>()
1771 .map_err(|err| FormulaDslError::InvalidArgument {
1772 reason: format!(
1773 "{fn_label}() argument '{key}' must be a finite number, got '{}': {err}: {raw}",
1774 value
1775 ),
1776 })?;
1777 if found.is_some() {
1778 return Err(FormulaDslError::IncompatibleTerm {
1779 reason: format!(
1780 "{fn_label}() cannot specify both '{}' and '{}': {raw}",
1781 found.expect("present").0,
1782 key
1783 ),
1784 }
1785 .into());
1786 }
1787 found = Some((key, parsed));
1788 }
1789 }
1790 Ok(found.map(|(_, v)| v))
1791}
1792
1793fn parse_linkwiggle_penalty_orders(raw: Option<&str>) -> Result<Vec<usize>, String> {
1794 let Some(raw) = raw.map(str::trim) else {
1795 return Ok(WigglePenaltyConfig::cubic_triple_operator_default().penalty_orders);
1796 };
1797 if raw.is_empty() {
1798 return Ok(WigglePenaltyConfig::cubic_triple_operator_default().penalty_orders);
1799 }
1800 let mut out = Vec::<usize>::new();
1801 for token in raw.split(',') {
1802 let t = token.trim().to_ascii_lowercase();
1803 if t.is_empty() {
1804 continue;
1805 }
1806 match t.as_str() {
1807 "all" => {
1808 out.extend([1, 2, 3]);
1809 }
1810 "slope" | "1" => out.push(1),
1811 "curvature" | "2" => out.push(2),
1812 "curvature-change" | "curvature_change" | "3" => out.push(3),
1813 _ => {
1814 return Err(FormulaDslError::InvalidArgument {
1815 reason: format!(
1816 "invalid linkwiggle penalty_order '{t}'; use all|slope|curvature|curvature-change or 1/2/3"
1817 ),
1818 }
1819 .into());
1820 }
1821 }
1822 }
1823 if out.is_empty() {
1824 out.extend(WigglePenaltyConfig::cubic_triple_operator_default().penalty_orders);
1825 }
1826 out.sort_unstable();
1827 out.dedup();
1828 Ok(out)
1829}
1830
1831pub fn parse_linkwiggle_formulaspec(
1832 options: &BTreeMap<String, String>,
1833 raw: &str,
1834) -> Result<LinkWiggleFormulaSpec, String> {
1835 let allowed = [
1836 "degree",
1837 "internal_knots",
1838 "penalty_order",
1839 "double_penalty",
1840 ];
1841 let unknown = options
1842 .keys()
1843 .filter(|key| !allowed.contains(&key.as_str()))
1844 .cloned()
1845 .collect::<Vec<_>>();
1846 let term_name = raw.split('(').next().unwrap_or("linkwiggle");
1847 if !unknown.is_empty() {
1848 return Err(FormulaDslError::InvalidArgument {
1849 reason: format!(
1850 "{}() does not support option(s) {}: {raw}",
1851 term_name,
1852 unknown.join(", ")
1853 ),
1854 }
1855 .into());
1856 }
1857 let defaults = WigglePenaltyConfig::cubic_triple_operator_default();
1858 let degree = option_usize_strict(options, "degree")?.unwrap_or(defaults.degree);
1874 if degree < 1 {
1875 return Err(FormulaDslError::InvalidArgument {
1876 reason: format!("{term_name}() requires degree >= 1: {raw}"),
1877 }
1878 .into());
1879 }
1880 let num_internal_knots =
1881 option_usize_strict(options, "internal_knots")?.unwrap_or(defaults.num_internal_knots);
1882 if num_internal_knots == 0 {
1883 return Err(FormulaDslError::InvalidArgument {
1884 reason: format!("{term_name}() requires internal_knots > 0: {raw}"),
1885 }
1886 .into());
1887 }
1888 let penalty_orders =
1889 parse_linkwiggle_penalty_orders(options.get("penalty_order").map(String::as_str))?;
1890 let double_penalty =
1891 option_bool_strict(options, "double_penalty")?.unwrap_or(defaults.double_penalty);
1892 Ok(LinkWiggleFormulaSpec {
1893 degree,
1894 num_internal_knots,
1895 penalty_orders,
1896 double_penalty,
1897 })
1898}
1899
1900fn parse_link_formulaspec(
1901 options: &BTreeMap<String, String>,
1902 raw: &str,
1903) -> Result<LinkFormulaSpec, String> {
1904 let link = options
1905 .get("type")
1906 .map(|s| s.trim().to_string())
1907 .ok_or_else(|| FormulaDslError::MalformedConfig {
1908 reason: format!("link() requires type=<link-name>: {raw}"),
1909 })?;
1910 if link.is_empty() {
1911 return Err(FormulaDslError::MalformedConfig {
1912 reason: format!("link() requires a non-empty type: {raw}"),
1913 }
1914 .into());
1915 }
1916 let mixture_rho = options.get("rho").map(|s| s.trim().to_string());
1917 let sas_init = options.get("sas_init").map(|s| s.trim().to_string());
1918 let beta_logistic_init = options
1919 .get("beta_logistic_init")
1920 .map(|s| s.trim().to_string());
1921 Ok(LinkFormulaSpec {
1922 link,
1923 mixture_rho,
1924 sas_init,
1925 beta_logistic_init,
1926 })
1927}
1928
1929fn parse_survival_formulaspec(
1930 options: &BTreeMap<String, String>,
1931 raw: &str,
1932) -> Result<SurvivalFormulaSpec, String> {
1933 if options.is_empty() {
1934 return Err(FormulaDslError::MalformedConfig {
1935 reason: format!(
1936 "survmodel() requires at least one named option (e.g., spec=..., distribution=...): {raw}"
1937 ),
1938 }
1939 .into());
1940 }
1941 Ok(SurvivalFormulaSpec {
1942 spec: options.get("spec").map(|s| s.trim().to_string()),
1943 survival_distribution: options.get("distribution").map(|s| s.trim().to_string()),
1944 })
1945}
1946
1947fn parse_bounded_priorspec(
1948 options: &BTreeMap<String, String>,
1949 min: f64,
1950 max: f64,
1951 raw: &str,
1952) -> Result<BoundedCoefficientPriorSpec, String> {
1953 let prior_mode = options.get("prior").map(|s| s.to_ascii_lowercase());
1954 let pull = options.get("pull").map(|s| s.to_ascii_lowercase());
1955 let target = parse_optional_f64_option(options, "target", raw)?;
1956 let strength = parse_optional_f64_option(options, "strength", raw)?;
1957
1958 let target_mode = target.is_some() || strength.is_some();
1959 if prior_mode.is_some() && pull.is_some() {
1960 return Err(FormulaDslError::IncompatibleTerm {
1961 reason: format!("bounded() cannot combine prior=... with pull=...: {raw}"),
1962 }
1963 .into());
1964 }
1965 if prior_mode.is_some() && target_mode {
1966 return Err(FormulaDslError::IncompatibleTerm {
1967 reason: format!("bounded() cannot combine prior=... with target/strength: {raw}"),
1968 }
1969 .into());
1970 }
1971 if pull.is_some() && target_mode {
1972 return Err(FormulaDslError::IncompatibleTerm {
1973 reason: format!("bounded() cannot combine pull=... with target/strength: {raw}"),
1974 }
1975 .into());
1976 }
1977
1978 if let Some(priorname) = prior_mode {
1979 return match priorname.as_str() {
1980 "none" => Ok(BoundedCoefficientPriorSpec::None),
1981 "uniform" | "log-jacobian" | "log_jacobian" | "jacobian" => {
1982 Ok(BoundedCoefficientPriorSpec::Uniform)
1983 }
1984 "center" => Ok(BoundedCoefficientPriorSpec::Beta { a: 2.0, b: 2.0 }),
1985 _ => Err(FormulaDslError::InvalidArgument {
1986 reason: format!(
1987 "bounded() prior must currently be one of none|uniform|log-jacobian|center, got '{}': {raw}",
1988 priorname
1989 ),
1990 }
1991 .into()),
1992 };
1993 }
1994
1995 if let Some(pull_mode) = pull {
1996 return match pull_mode.as_str() {
1997 "uniform" | "log-jacobian" | "log_jacobian" | "jacobian" => {
1998 Ok(BoundedCoefficientPriorSpec::Uniform)
1999 }
2000 "center" => Ok(BoundedCoefficientPriorSpec::Beta { a: 2.0, b: 2.0 }),
2001 _ => Err(FormulaDslError::InvalidArgument {
2002 reason: format!(
2003 "bounded() pull must currently be 'uniform'/'log-jacobian' or 'center', got '{}': {raw}",
2004 pull_mode
2005 ),
2006 }
2007 .into()),
2008 };
2009 }
2010
2011 if target_mode {
2012 let targetvalue = target.ok_or_else(|| FormulaDslError::MalformedConfig {
2013 reason: format!("bounded() target is required with strength: {raw}"),
2014 })?;
2015 let strengthvalue = strength.ok_or_else(|| FormulaDslError::MalformedConfig {
2016 reason: format!("bounded() strength is required with target: {raw}"),
2017 })?;
2018 if !(min < targetvalue && targetvalue < max) {
2019 return Err(FormulaDslError::InvalidArgument {
2020 reason: format!("bounded() target must lie strictly inside ({min}, {max}): {raw}"),
2021 }
2022 .into());
2023 }
2024 if !strengthvalue.is_finite() || strengthvalue <= 0.0 {
2025 return Err(FormulaDslError::InvalidArgument {
2026 reason: format!("bounded() strength must be finite and > 0: {raw}"),
2027 }
2028 .into());
2029 }
2030 let z = (targetvalue - min) / (max - min);
2031 let a = 1.0 + strengthvalue * z;
2032 let b = 1.0 + strengthvalue * (1.0 - z);
2033 return Ok(BoundedCoefficientPriorSpec::Beta { a, b });
2034 }
2035
2036 Ok(BoundedCoefficientPriorSpec::None)
2037}
2038
2039pub fn formula_rhs_text(formula: &str) -> Result<String, String> {
2044 let parsed = parse_formula_dsl(formula)?;
2045 if parsed.rhs_terms.is_empty() {
2046 return Err(FormulaDslError::ParseError {
2047 reason: "formula right-hand side cannot be empty".to_string(),
2048 }
2049 .into());
2050 }
2051 Ok(parsed.rhs_terms.join(" + "))
2052}
2053
2054pub fn parse_surv_response(
2061 lhs: &str,
2062) -> Result<Option<(Option<String>, String, String)>, FormulaDslError> {
2063 let trimmed = lhs.trim();
2064 let call = match parse_function_call(trimmed) {
2065 Ok(call) => call,
2066 Err(_) => return Ok(None),
2067 };
2068 if !call.name.eq_ignore_ascii_case("surv") {
2069 return Ok(None);
2070 }
2071 let vars = call
2072 .args
2073 .iter()
2074 .filter_map(|arg| match arg {
2075 CallArgSpec::Positional(v) => Some(v.trim().to_string()),
2076 CallArgSpec::Named { .. } => None,
2077 })
2078 .filter(|s| !s.is_empty())
2079 .collect::<Vec<_>>();
2080 match vars.len() {
2081 2 => Ok(Some((None, vars[0].clone(), vars[1].clone()))),
2085 3 => Ok(Some((
2086 Some(vars[0].clone()),
2087 vars[1].clone(),
2088 vars[2].clone(),
2089 ))),
2090 n => Err(FormulaDslError::InvalidArgument {
2091 reason: format!(
2092 "Surv(...) expects either Surv(time, event) (right-censored) or \
2093 Surv(entry, exit, event) (left-truncated); got {n} columns"
2094 ),
2095 }),
2096 }
2097}
2098
2099pub fn parse_surv_interval_response(
2114 lhs: &str,
2115) -> Result<Option<(String, String, String)>, FormulaDslError> {
2116 let trimmed = lhs.trim();
2117 let call = match parse_function_call(trimmed) {
2118 Ok(call) => call,
2119 Err(_) => return Ok(None),
2120 };
2121 if !call.name.eq_ignore_ascii_case("survinterval") {
2122 return Ok(None);
2123 }
2124 let vars = call
2125 .args
2126 .iter()
2127 .filter_map(|arg| match arg {
2128 CallArgSpec::Positional(v) => Some(v.trim().to_string()),
2129 CallArgSpec::Named { .. } => None,
2130 })
2131 .filter(|s| !s.is_empty())
2132 .collect::<Vec<_>>();
2133 match vars.len() {
2134 3 => Ok(Some((vars[0].clone(), vars[1].clone(), vars[2].clone()))),
2135 n => Err(FormulaDslError::InvalidArgument {
2136 reason: format!(
2137 "SurvInterval(...) expects SurvInterval(L, R, event) (interval-censored, the \
2138 observed bracket T ∈ (L, R]); got {n} columns"
2139 ),
2140 }),
2141 }
2142}
2143
2144fn top_level_formula_separator(input: &str) -> Result<Option<usize>, String> {
2145 let mut depth = 0_i32;
2146 let mut in_single = false;
2147 let mut in_double = false;
2148
2149 for (idx, ch) in input.char_indices() {
2150 match ch {
2151 '\'' if !in_double => in_single = !in_single,
2152 '"' if !in_single => in_double = !in_double,
2153 '(' | '[' | '{' if !in_single && !in_double => depth += 1,
2154 ')' | ']' | '}' if !in_single && !in_double && depth > 0 => depth -= 1,
2155 '~' if !in_single && !in_double && depth == 0 => return Ok(Some(idx)),
2156 _ => {}
2157 }
2158 }
2159
2160 if in_single || in_double || depth != 0 {
2161 return Err(FormulaDslError::ParseError {
2162 reason: "invalid auxiliary formula syntax: unbalanced parentheses or quotes"
2163 .to_string(),
2164 }
2165 .into());
2166 }
2167 Ok(None)
2168}
2169
2170pub fn parse_matching_auxiliary_formula(
2171 formula: &str,
2172 response: &str,
2173 flag_name: &str,
2174) -> Result<(String, ParsedFormula), FormulaDslError> {
2175 let rhs = formula.trim();
2176 if top_level_formula_separator(rhs)?.is_some() {
2177 return Err(FormulaDslError::InvalidArgument {
2178 reason: format!(
2179 "{flag_name} expects only the terms after '~', not a full 'response ~ terms' formula; use {flag_name} 's(x)' instead of {flag_name} 'y ~ s(x)' (or pass '1' for an intercept-only noise model)"
2180 ),
2181 });
2182 }
2183 let parsed_formula = parse_formula(&format!("{response} ~ {rhs}"))?;
2184 Ok((rhs.to_string(), parsed_formula))
2185}
2186
2187pub fn validate_auxiliary_formula_controls(
2188 parsed_formula: &ParsedFormula,
2189 flag_name: &str,
2190) -> Result<(), String> {
2191 if parsed_formula.linkwiggle.is_some() {
2192 return Err(FormulaDslError::IncompatibleTerm {
2193 reason: format!(
2194 "linkwiggle(...) is only supported in the main formula, not {flag_name}"
2195 ),
2196 }
2197 .into());
2198 }
2199 if parsed_formula.timewiggle.is_some() {
2200 return Err(FormulaDslError::IncompatibleTerm {
2201 reason: format!(
2202 "timewiggle(...) is only supported in the main survival formula, not {flag_name}"
2203 ),
2204 }
2205 .into());
2206 }
2207 if parsed_formula.linkspec.is_some() {
2208 return Err(FormulaDslError::IncompatibleTerm {
2209 reason: format!("link(...) is only supported in the main formula, not {flag_name}"),
2210 }
2211 .into());
2212 }
2213 if parsed_formula.survivalspec.is_some() {
2214 return Err(FormulaDslError::IncompatibleTerm {
2215 reason: format!(
2216 "survmodel(...) is only supported in the main survival formula, not {flag_name}"
2217 ),
2218 }
2219 .into());
2220 }
2221 if !parsed_formula.logslope_surfaces.is_empty() && flag_name != "--logslope-formula" {
2222 return Err(FormulaDslError::IncompatibleTerm {
2223 reason: format!(
2224 "logslope(...) is only supported in --logslope-formula, not {flag_name}"
2225 ),
2226 }
2227 .into());
2228 }
2229 Ok::<(), _>(())
2230}
2231
2232pub fn parse_formula(formula: &str) -> Result<ParsedFormula, FormulaDslError> {
2233 let parsed_dsl =
2234 parse_formula_dsl(formula).map_err(|reason| FormulaDslError::ParseError { reason })?;
2235 let lhs = parsed_dsl.response_expr.trim();
2236 if lhs.is_empty() {
2237 return Err(FormulaDslError::ParseError {
2238 reason: "formula response (left-hand side) cannot be empty".to_string(),
2239 });
2240 }
2241 let mut terms = Vec::<ParsedTerm>::new();
2242 let mut linkwiggle: Option<LinkWiggleFormulaSpec> = None;
2243 let mut timewiggle: Option<LinkWiggleFormulaSpec> = None;
2244 let mut linkspec: Option<LinkFormulaSpec> = None;
2245 let mut survivalspec: Option<SurvivalFormulaSpec> = None;
2246 let mut logslope_surfaces = Vec::<LogSlopeSurfaceSpec>::new();
2247 let mut seen_term_keys: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
2252 let mut expanded_terms = Vec::<String>::new();
2253 for raw in parsed_dsl.rhs_terms {
2254 let trimmed = raw.trim();
2255 if trimmed.is_empty() {
2256 expanded_terms.push(String::new());
2257 continue;
2258 }
2259 let is_call = parse_function_call(trimmed).is_ok();
2264 let needs_expansion = !is_call
2265 && trimmed
2266 .chars()
2267 .scan(0i32, |depth, ch| {
2268 let d_before = *depth;
2269 match ch {
2270 '(' | '[' | '{' => *depth += 1,
2271 ')' | ']' | '}' if *depth > 0 => *depth -= 1,
2272 _ => {}
2273 }
2274 Some((d_before, ch))
2275 })
2276 .any(|(d, ch)| d == 0 && matches!(ch, ':' | '*' | '/' | '^'));
2277 if needs_expansion {
2278 for atoms in
2279 expand_wr_term(trimmed).map_err(|reason| FormulaDslError::ParseError { reason })?
2280 {
2281 if atoms.is_empty() {
2282 continue;
2283 }
2284 expanded_terms.push(atoms.join(":"));
2285 }
2286 } else {
2287 expanded_terms.push(trimmed.to_string());
2288 }
2289 }
2290
2291 for raw in expanded_terms {
2292 let t = raw.trim();
2293 if t.is_empty() || t == "1" {
2294 continue;
2295 }
2296 if t == "0" || t == "-1" {
2297 return Err(FormulaDslError::IncompatibleTerm {
2298 reason: "formula terms '0'/'-1' (intercept removal) are not supported yet"
2299 .to_string(),
2300 });
2301 }
2302 let key: String = {
2306 let mut acc = String::with_capacity(t.len());
2307 let mut in_single = false;
2308 let mut in_double = false;
2309 for ch in t.chars() {
2310 match ch {
2311 '\'' if !in_double => {
2312 in_single = !in_single;
2313 acc.push(ch);
2314 }
2315 '"' if !in_single => {
2316 in_double = !in_double;
2317 acc.push(ch);
2318 }
2319 c if c.is_whitespace() && !in_single && !in_double => {}
2320 _ => acc.push(ch),
2321 }
2322 }
2323 acc
2324 };
2325 if !seen_term_keys.insert(key.clone()) {
2326 return Err(FormulaDslError::IncompatibleTerm {
2327 reason: format!(
2328 "formula `{formula}` lists term `{t}` more than once. \
2329 Duplicate terms produce a rank-deficient design; \
2330 keep one copy or differentiate them (e.g. distinct k=, bs= options)."
2331 ),
2332 });
2333 }
2334 match parse_term(t)? {
2335 ParsedTerm::LinkWiggle { options } => {
2336 if linkwiggle.is_some() {
2337 return Err(FormulaDslError::IncompatibleTerm {
2338 reason: "formula can include at most one linkwiggle(...) term".to_string(),
2339 });
2340 }
2341 linkwiggle = Some(parse_linkwiggle_formulaspec(&options, t)?);
2342 }
2343 ParsedTerm::TimeWiggle { options } => {
2344 if timewiggle.is_some() {
2345 return Err(FormulaDslError::IncompatibleTerm {
2346 reason: "formula can include at most one timewiggle(...) term".to_string(),
2347 });
2348 }
2349 timewiggle = Some(parse_linkwiggle_formulaspec(&options, t)?);
2350 }
2351 ParsedTerm::LinkConfig { options } => {
2352 if linkspec.is_some() {
2353 return Err(FormulaDslError::IncompatibleTerm {
2354 reason: "formula can include at most one link(...) term".to_string(),
2355 });
2356 }
2357 linkspec = Some(parse_link_formulaspec(&options, t)?);
2358 }
2359 ParsedTerm::SurvivalConfig { options } => {
2360 if survivalspec.is_some() {
2361 return Err(FormulaDslError::IncompatibleTerm {
2362 reason: "formula can include at most one survmodel(...) term".to_string(),
2363 });
2364 }
2365 survivalspec = Some(parse_survival_formulaspec(&options, t)?);
2366 }
2367 ParsedTerm::LogSlopeSurface { z_column, terms } => {
2368 logslope_surfaces.push(LogSlopeSurfaceSpec { z_column, terms });
2369 }
2370 other => terms.push(other),
2371 }
2372 }
2373 if lhs.chars().all(|c| c.is_alphanumeric() || c == '_')
2379 && !lhs.is_empty()
2380 && parsed_terms_reference_column(&terms, lhs)
2381 {
2382 return Err(FormulaDslError::IncompatibleTerm {
2383 reason: format!(
2384 "formula `{formula}` uses response column `{lhs}` as its own predictor. \
2385 This fits y as a function of itself and is almost certainly a typo. \
2386 Drop the term that mentions `{lhs}` from the right-hand side."
2387 ),
2388 });
2389 }
2390 Ok(ParsedFormula {
2391 response: lhs.to_string(),
2392 terms,
2393 logslope_surfaces,
2394 linkwiggle,
2395 timewiggle,
2396 linkspec,
2397 survivalspec,
2398 })
2399}
2400
2401pub fn parse_term(raw: &str) -> Result<ParsedTerm, String> {
2402 fn split_call_args(call: &FunctionCallSpec) -> (Vec<String>, BTreeMap<String, String>) {
2403 let mut vars = Vec::<String>::new();
2404 let mut options = BTreeMap::<String, String>::new();
2405 for arg in &call.args {
2406 match arg {
2407 CallArgSpec::Positional(v) => vars.push(v.trim().to_string()),
2408 CallArgSpec::Named { key, value } => {
2409 options.insert(key.to_ascii_lowercase(), strip_quotes(value).to_string());
2410 }
2411 }
2412 }
2413 (vars, options)
2414 }
2415
2416 if raw.contains(':')
2420 && !raw.contains('(')
2421 && raw.split(':').all(|piece| is_exact_ident(piece.trim()))
2422 {
2423 let vars: Vec<String> = raw
2424 .split(':')
2425 .map(|piece| piece.trim().to_string())
2426 .collect();
2427 if vars.len() >= 2 {
2428 let mut sorted = vars.clone();
2429 sorted.sort();
2430 sorted.dedup();
2431 if sorted.len() != vars.len() {
2432 return Err(FormulaDslError::IncompatibleTerm {
2433 reason: format!(
2434 "interaction term `{raw}` references the same variable more than once"
2435 ),
2436 }
2437 .into());
2438 }
2439 return Ok(ParsedTerm::Interaction { vars: sorted });
2440 }
2441 }
2442
2443 let call = parse_function_call(raw).ok();
2444 if let Some(call) = call {
2445 let name = call.name.to_ascii_lowercase();
2446 let (vars, mut options) = split_call_args(&call);
2447 match name.as_str() {
2448 "constrain" | "constraint" | "box" => {
2449 if vars.len() != 1 {
2450 return Err(FormulaDslError::InvalidArgument {
2451 reason: format!(
2452 "constrain()/constraint()/box() expects exactly one variable: {raw}"
2453 ),
2454 }
2455 .into());
2456 }
2457 validate_known_term_options(
2458 "constrain",
2459 &options,
2460 &["min", "lower", "max", "upper"],
2461 raw,
2462 )?;
2463 let (coefficient_min, coefficient_max) =
2464 parse_linear_constraint_bounds(&options, raw)?;
2465 if coefficient_min.is_none() && coefficient_max.is_none() {
2466 return Err(FormulaDslError::MalformedConfig {
2467 reason: format!(
2468 "constrain()/constraint()/box() requires at least one of min/lower/max/upper: {raw}"
2469 ),
2470 }
2471 .into());
2472 }
2473 return Ok(ParsedTerm::Linear {
2474 name: vars[0].clone(),
2475 explicit: true,
2476 coefficient_min,
2477 coefficient_max,
2478 });
2479 }
2480 "nonnegative" | "nonnegative_coef" => {
2481 if vars.len() != 1 {
2482 return Err(FormulaDslError::InvalidArgument {
2483 reason: format!("nonnegative() expects exactly one variable: {raw}"),
2484 }
2485 .into());
2486 }
2487 validate_known_term_options("nonnegative", &options, &[], raw)?;
2488 return Ok(ParsedTerm::Linear {
2489 name: vars[0].clone(),
2490 explicit: true,
2491 coefficient_min: Some(0.0),
2492 coefficient_max: None,
2493 });
2494 }
2495 "nonpositive" | "nonpositive_coef" => {
2496 if vars.len() != 1 {
2497 return Err(FormulaDslError::InvalidArgument {
2498 reason: format!("nonpositive() expects exactly one variable: {raw}"),
2499 }
2500 .into());
2501 }
2502 validate_known_term_options("nonpositive", &options, &[], raw)?;
2503 return Ok(ParsedTerm::Linear {
2504 name: vars[0].clone(),
2505 explicit: true,
2506 coefficient_min: None,
2507 coefficient_max: Some(0.0),
2508 });
2509 }
2510 "bounded" => {
2511 if vars.len() != 1 {
2512 return Err(FormulaDslError::InvalidArgument {
2513 reason: format!("bounded() expects exactly one variable: {raw}"),
2514 }
2515 .into());
2516 }
2517 validate_known_term_options(
2518 "bounded",
2519 &options,
2520 &["min", "max", "prior", "pull", "target", "strength"],
2521 raw,
2522 )?;
2523 let min = parse_required_f64_option(&options, "min", raw)?;
2524 let max = parse_required_f64_option(&options, "max", raw)?;
2525 if !min.is_finite() || !max.is_finite() || min >= max {
2526 return Err(FormulaDslError::InvalidArgument {
2527 reason: format!(
2528 "bounded() requires finite min < max, got min={min}, max={max}: {raw}"
2529 ),
2530 }
2531 .into());
2532 }
2533 let prior = parse_bounded_priorspec(&options, min, max, raw)?;
2534 return Ok(ParsedTerm::BoundedLinear {
2535 name: vars[0].clone(),
2536 min,
2537 max,
2538 prior,
2539 });
2540 }
2541 "group" | "re" | "factor" => {
2542 if vars.len() != 1 {
2543 return Err(FormulaDslError::InvalidArgument {
2544 reason: format!(
2545 "{name}() expects exactly one variable, got '{}': {raw}",
2546 vars.join(",")
2547 ),
2548 }
2549 .into());
2550 }
2551 return Ok(ParsedTerm::RandomEffect {
2552 name: vars[0].clone(),
2553 });
2554 }
2555 "tensor" | "interaction" | "te" => {
2556 if vars.len() < 2 {
2557 return Err(FormulaDslError::InvalidArgument {
2558 reason: format!(
2559 "tensor()/interaction()/te() requires at least two variables: {raw}"
2560 ),
2561 }
2562 .into());
2563 }
2564 return Ok(ParsedTerm::Smooth {
2565 label: raw.to_string(),
2566 vars,
2567 kind: SmoothKind::Te,
2568 options,
2569 });
2570 }
2571 "t2" => {
2572 if vars.len() < 2 {
2573 return Err(FormulaDslError::InvalidArgument {
2574 reason: format!("t2() requires at least two variables: {raw}"),
2575 }
2576 .into());
2577 }
2578 return Ok(ParsedTerm::Smooth {
2579 label: raw.to_string(),
2580 vars,
2581 kind: SmoothKind::T2,
2582 options,
2583 });
2584 }
2585 "ti" => {
2586 if vars.len() < 2 {
2593 return Err(FormulaDslError::InvalidArgument {
2594 reason: format!("ti() requires at least two variables: {raw}"),
2595 }
2596 .into());
2597 }
2598 return Ok(ParsedTerm::Smooth {
2599 label: raw.to_string(),
2600 vars,
2601 kind: SmoothKind::Ti,
2602 options,
2603 });
2604 }
2605 "fs" | "sz" => {
2606 if vars.len() != 2 {
2607 return Err(format!("{}() expects exactly two variables: {raw}", name));
2608 }
2609 options.insert("bs".to_string(), name.clone());
2610 return Ok(ParsedTerm::Smooth {
2611 label: raw.to_string(),
2612 vars,
2613 kind: SmoothKind::S,
2614 options,
2615 });
2616 }
2617 "thinplate" | "thin_plate" | "tps" => {
2618 if vars.len() < 2 {
2619 return Err(FormulaDslError::InvalidArgument {
2620 reason: format!(
2621 "thinplate()/thin_plate()/tps() requires at least two variables: {raw}"
2622 ),
2623 }
2624 .into());
2625 }
2626 options.insert("type".to_string(), "tps".to_string());
2627 return Ok(ParsedTerm::Smooth {
2628 label: raw.to_string(),
2629 vars,
2630 kind: SmoothKind::S,
2631 options,
2632 });
2633 }
2634 "smooth" | "s" | "cyclic" | "periodic" | "cc" | "cp" => {
2635 if vars.is_empty() {
2636 return Err(FormulaDslError::InvalidArgument {
2637 reason: format!("smooth()/s() requires at least one variable: {raw}"),
2638 }
2639 .into());
2640 }
2641 let bs_is_re = options
2647 .get("bs")
2648 .or_else(|| options.get("type"))
2649 .map(|v| {
2650 v.trim()
2651 .trim_matches(|c| c == '\'' || c == '"')
2652 .to_ascii_lowercase()
2653 })
2654 .as_deref()
2655 == Some("re");
2656 if bs_is_re && vars.len() == 1 {
2657 return Ok(ParsedTerm::RandomEffect {
2658 name: vars[0].clone(),
2659 });
2660 }
2661 if matches!(name.as_str(), "cyclic" | "periodic" | "cc" | "cp") {
2662 options.insert("type".to_string(), "cyclic".to_string());
2663 }
2664 if matches!(name.as_str(), "fs" | "sz") {
2665 options.insert("bs".to_string(), name.clone());
2666 }
2667 return Ok(ParsedTerm::Smooth {
2668 label: raw.to_string(),
2669 vars,
2670 kind: SmoothKind::S,
2671 options,
2672 });
2673 }
2674 "sphere" | "sos" | "spherical" | "s2" => {
2675 if vars.len() != 2 {
2682 return Err(FormulaDslError::InvalidArgument {
2683 reason: format!(
2684 "{name}() expects exactly two variables: latitude and longitude; got {} in {raw}",
2685 vars.len()
2686 ),
2687 }
2688 .into());
2689 }
2690 options.insert("type".to_string(), "sphere".to_string());
2691 return Ok(ParsedTerm::Smooth {
2692 label: raw.to_string(),
2693 vars,
2694 kind: SmoothKind::S,
2695 options,
2696 });
2697 }
2698 "mjs" | "measurejet" | "measure_jet" | "web" => {
2699 if vars.is_empty() {
2705 return Err(FormulaDslError::InvalidArgument {
2706 reason: format!("{name}() requires at least one variable: {raw}"),
2707 }
2708 .into());
2709 }
2710 options.insert("type".to_string(), "measurejet".to_string());
2711 return Ok(ParsedTerm::Smooth {
2712 label: raw.to_string(),
2713 vars,
2714 kind: SmoothKind::S,
2715 options,
2716 });
2717 }
2718 "curv" | "curvature" | "constant_curvature" | "mkappa" => {
2719 if vars.is_empty() {
2725 return Err(FormulaDslError::InvalidArgument {
2726 reason: format!("{name}() requires at least one variable: {raw}"),
2727 }
2728 .into());
2729 }
2730 options.insert("type".to_string(), "curvature".to_string());
2731 return Ok(ParsedTerm::Smooth {
2732 label: raw.to_string(),
2733 vars,
2734 kind: SmoothKind::S,
2735 options,
2736 });
2737 }
2738 "matern" => {
2739 if vars.is_empty() {
2740 return Err(FormulaDslError::InvalidArgument {
2741 reason: format!("matern() requires at least one variable: {raw}"),
2742 }
2743 .into());
2744 }
2745 options.insert("type".to_string(), "matern".to_string());
2746 return Ok(ParsedTerm::Smooth {
2747 label: raw.to_string(),
2748 vars,
2749 kind: SmoothKind::S,
2750 options,
2751 });
2752 }
2753 "duchon" => {
2754 if vars.is_empty() {
2755 return Err(FormulaDslError::InvalidArgument {
2756 reason: format!("duchon() requires at least one variable: {raw}"),
2757 }
2758 .into());
2759 }
2760 if option_bool(&options, "cyclic").unwrap_or(false)
2761 || option_bool(&options, "periodic").unwrap_or(false)
2762 {
2763 options.insert("cyclic".to_string(), "true".to_string());
2764 }
2765 options.insert("type".to_string(), "duchon".to_string());
2766 return Ok(ParsedTerm::Smooth {
2767 label: raw.to_string(),
2768 vars,
2769 kind: SmoothKind::S,
2770 options,
2771 });
2772 }
2773 "pca" => {
2774 if vars.is_empty() {
2775 return Err(FormulaDslError::InvalidArgument {
2776 reason: format!("pca() requires at least one variable: {raw}"),
2777 }
2778 .into());
2779 }
2780 options.insert("type".to_string(), "pca".to_string());
2781 return Ok(ParsedTerm::Smooth {
2782 label: raw.to_string(),
2783 vars,
2784 kind: SmoothKind::S,
2785 options,
2786 });
2787 }
2788 "linkwiggle" => {
2789 if !vars.is_empty() {
2790 return Err(FormulaDslError::InvalidArgument {
2791 reason: format!(
2792 "linkwiggle() takes named options only; positional args are not supported: {raw}"
2793 ),
2794 }
2795 .into());
2796 }
2797 return Ok(ParsedTerm::LinkWiggle { options });
2798 }
2799 "timewiggle" => {
2800 if !vars.is_empty() {
2801 return Err(FormulaDslError::InvalidArgument {
2802 reason: format!(
2803 "timewiggle() takes named options only; positional args are not supported: {raw}"
2804 ),
2805 }
2806 .into());
2807 }
2808 return Ok(ParsedTerm::TimeWiggle { options });
2809 }
2810 "link" => {
2811 if !vars.is_empty() {
2812 return Err(FormulaDslError::InvalidArgument {
2813 reason: format!(
2814 "link() takes named options only; positional args are not supported: {raw}"
2815 ),
2816 }
2817 .into());
2818 }
2819 return Ok(ParsedTerm::LinkConfig { options });
2820 }
2821 "survmodel" => {
2822 if !vars.is_empty() {
2823 return Err(FormulaDslError::InvalidArgument {
2824 reason: format!(
2825 "survmodel() takes named options only; positional args are not supported: {raw}"
2826 ),
2827 }
2828 .into());
2829 }
2830 return Ok(ParsedTerm::SurvivalConfig { options });
2831 }
2832 "logslope" | "log_slope" | "log_slope_surface" => {
2833 validate_known_term_options("logslope", &options, &[], raw)?;
2834 if vars.len() < 2 {
2835 return Err(FormulaDslError::InvalidArgument {
2836 reason: format!(
2837 "logslope() expects a z column followed by one or more RHS terms; add one logslope(z, ...) declaration per vector-z coordinate: {raw}"
2838 ),
2839 }
2840 .into());
2841 }
2842 let z_column = vars[0].trim();
2843 if !is_exact_ident(z_column) {
2844 return Err(FormulaDslError::InvalidArgument {
2845 reason: format!(
2846 "logslope() z column must be a bare column name, got `{z_column}` in {raw}"
2847 ),
2848 }
2849 .into());
2850 }
2851 let rhs = vars[1..].join(" + ");
2852 let parsed = parse_formula(&format!("__logslope__ ~ {rhs}"))?;
2853 if !parsed.logslope_surfaces.is_empty() {
2854 return Err(FormulaDslError::IncompatibleTerm {
2855 reason: format!(
2856 "logslope() declarations cannot be nested inside another logslope(): {raw}"
2857 ),
2858 }
2859 .into());
2860 }
2861 validate_auxiliary_formula_controls(&parsed, "logslope()")?;
2862 return Ok(ParsedTerm::LogSlopeSurface {
2863 z_column: z_column.to_string(),
2864 terms: parsed.terms,
2865 });
2866 }
2867 "linear" => {
2868 if vars.len() != 1 {
2869 return Err(FormulaDslError::InvalidArgument {
2870 reason: format!("linear() expects exactly one variable: {raw}"),
2871 }
2872 .into());
2873 }
2874 validate_known_term_options(
2875 "linear",
2876 &options,
2877 &["min", "lower", "max", "upper"],
2878 raw,
2879 )?;
2880 let (coefficient_min, coefficient_max) =
2881 parse_linear_constraint_bounds(&options, raw)?;
2882 return Ok(ParsedTerm::Linear {
2883 name: vars[0].clone(),
2884 explicit: true,
2885 coefficient_min,
2886 coefficient_max,
2887 });
2888 }
2889 _ => {
2890 return Err(format!(
2891 "unknown term function `{name}` in '{raw}'. Supported: bounded(), linear(), constrain()/constraint()/box(), nonnegative(), nonpositive(), smooth()/s(), cyclic()/periodic()/cc()/cp(), thinplate()/thin_plate()/tps(), tensor()/interaction()/te(), t2(), ti(), fs(), sz(), group()/re()/factor(), sphere()/sos()/spherical(), s2(), matern(), duchon(), pca(), logslope()/log_slope(), linkwiggle(), timewiggle(), link(), survmodel()"
2892 ));
2893 }
2894 }
2895 }
2896
2897 let ident = raw.trim();
2898 if !is_exact_ident(ident) {
2899 return Err(FormulaDslError::UnknownIdentifier {
2900 reason: format!("unsupported top-level RHS term: {raw}"),
2901 }
2902 .into());
2903 }
2904
2905 Ok(ParsedTerm::Linear {
2906 name: ident.to_string(),
2907 explicit: false,
2908 coefficient_min: None,
2909 coefficient_max: None,
2910 })
2911}
2912
2913pub fn parse_link_choice(
2918 raw: Option<&str>,
2919 flexible_flag: bool,
2920) -> Result<Option<LinkChoice>, FormulaDslError> {
2921 if raw.is_none() && !flexible_flag {
2922 return Ok(None);
2923 }
2924 let Some(v) = raw else {
2925 return Ok(Some(LinkChoice {
2926 mode: LinkMode::Flexible,
2927 link: LinkFunction::Probit,
2928 mixture_components: None,
2929 }));
2930 };
2931 let t = v.trim().to_ascii_lowercase();
2932 if let Some(inner) = t
2933 .strip_prefix("flexible(")
2934 .and_then(|s| s.strip_suffix(')'))
2935 {
2936 if let Some(components_inner) = inner
2937 .strip_prefix("blended(")
2938 .and_then(|s| s.strip_suffix(')'))
2939 .or_else(|| {
2940 inner
2941 .strip_prefix("mixture(")
2942 .and_then(|s| s.strip_suffix(')'))
2943 })
2944 {
2945 parse_link_component_list(components_inner)?;
2946 return Err(FormulaDslError::IncompatibleTerm {
2947 reason:
2948 "flexible(...) does not support blended(...)/mixture(...) links; wiggle is only supported for jointly fit standard links"
2949 .to_string(),
2950 });
2951 }
2952 let link = parse_linkname(inner)?;
2953 if !linkname_supports_joint_wiggle(link) {
2954 return Err(FormulaDslError::IncompatibleTerm {
2955 reason:
2956 "flexible(...) does not support sas/beta-logistic links; wiggle is only supported for jointly fit standard links"
2957 .to_string(),
2958 });
2959 }
2960 return Ok(Some(LinkChoice {
2961 mode: LinkMode::Flexible,
2962 link,
2963 mixture_components: None,
2964 }));
2965 }
2966 if let Some(inner) = t
2967 .strip_prefix("blended(")
2968 .and_then(|s| s.strip_suffix(')'))
2969 .or_else(|| t.strip_prefix("mixture(").and_then(|s| s.strip_suffix(')')))
2970 {
2971 if flexible_flag {
2972 return Err(FormulaDslError::IncompatibleTerm {
2973 reason:
2974 "--flexible-link cannot be combined with --link blended(...)/mixture(...); blended inverse links are not flexible-link mode"
2975 .to_string(),
2976 });
2977 }
2978 let components = parse_link_component_list(inner)?;
2979 return Ok(Some(LinkChoice {
2980 mode: LinkMode::Strict,
2981 link: LinkFunction::Logit,
2982 mixture_components: Some(components),
2983 }));
2984 }
2985
2986 let link = parse_linkname(&t)?;
2987 if flexible_flag && !linkname_supports_joint_wiggle(link) {
2988 return Err(FormulaDslError::IncompatibleTerm {
2989 reason:
2990 "--flexible-link does not support sas/beta-logistic links; wiggle is only supported for jointly fit standard links"
2991 .to_string(),
2992 });
2993 }
2994 Ok(Some(LinkChoice {
2995 mode: if flexible_flag {
2996 LinkMode::Flexible
2997 } else {
2998 LinkMode::Strict
2999 },
3000 link,
3001 mixture_components: None,
3002 }))
3003}
3004
3005pub fn parse_linkname(v: &str) -> Result<LinkFunction, FormulaDslError> {
3006 match v.trim() {
3007 "identity" => Ok(LinkFunction::Identity),
3008 "log" => Ok(LinkFunction::Log),
3009 "logit" | "binomial-logit" => Ok(LinkFunction::Logit),
3010 "probit" | "binomial-probit" => Ok(LinkFunction::Probit),
3011 "cloglog" | "binomial-cloglog" => Ok(LinkFunction::CLogLog),
3012 "sas" => Ok(LinkFunction::Sas),
3013 "beta-logistic" => Ok(LinkFunction::BetaLogistic),
3014 other => Err(FormulaDslError::UnknownIdentifier {
3015 reason: format!(
3016 "unsupported link type '{other}'; \
3017 use one of identity|log|logit|probit|cloglog|binomial-logit|binomial-probit|binomial-cloglog|sas|beta-logistic|blended(...)/mixture(...) or flexible(...). \
3018 Both `--link <type>` (CLI flag) and `link(type=<type>)` (formula term) accept the same set."
3019 ),
3020 }),
3021 }
3022}
3023
3024pub fn parse_link_component(v: &str) -> Result<LinkComponent, String> {
3025 match v.trim() {
3026 "logit" => Ok(LinkComponent::Logit),
3027 "probit" => Ok(LinkComponent::Probit),
3028 "cloglog" => Ok(LinkComponent::CLogLog),
3029 "loglog" => Ok(LinkComponent::LogLog),
3030 "cauchit" => Ok(LinkComponent::Cauchit),
3031 other => Err(FormulaDslError::UnknownIdentifier {
3032 reason: format!(
3033 "unsupported blended-link component '{other}'; use probit|logit|cloglog|loglog|cauchit"
3034 ),
3035 }
3036 .into()),
3037 }
3038}
3039
3040pub fn parse_link_component_list(v: &str) -> Result<Vec<LinkComponent>, String> {
3041 let mut out = Vec::new();
3042 for part in v.split(',') {
3043 let trimmed = part.trim();
3044 if trimmed.is_empty() {
3045 continue;
3046 }
3047 let comp = parse_link_component(trimmed)?;
3048 if out.contains(&comp) {
3049 return Err(FormulaDslError::IncompatibleTerm {
3050 reason: "blended(...) cannot contain duplicate components".to_string(),
3051 }
3052 .into());
3053 }
3054 out.push(comp);
3055 }
3056 if out.len() < 2 {
3057 return Err(FormulaDslError::InvalidArgument {
3058 reason: "blended(...) requires at least two components".to_string(),
3059 }
3060 .into());
3061 }
3062 Ok(out)
3063}