Skip to main content

ratex_parser/
macro_expander.rs

1use std::collections::HashMap;
2
3use ratex_lexer::token::{SourceLocation, Token};
4use ratex_lexer::Lexer;
5
6use crate::error::{ParseError, ParseResult};
7use crate::functions::FUNCTIONS;
8use crate::parse_node::Mode;
9
10/// Commands that act like macros but aren't defined as a macro, function, or symbol.
11/// Used in `is_defined`.
12pub static IMPLICIT_COMMANDS: &[&str] = &["^", "_", "\\limits", "\\nolimits"];
13
14/// Handler type for function-based macros (e.g. \TextOrMath, \@ifstar).
15/// Takes the MacroExpander mutably and returns tokens to push onto the stack.
16pub type FnMacroHandler = fn(&mut MacroExpander) -> ParseResult<Vec<Token>>;
17
18/// A macro definition: string template, token list, or function.
19#[derive(Clone)]
20pub enum MacroDefinition {
21    /// Simple string expansion (e.g., `\def\foo{bar}` → "bar")
22    Text(String),
23    /// Pre-tokenized expansion with argument count
24    Tokens {
25        tokens: Vec<Token>,
26        num_args: usize,
27    },
28    /// Function-based macro (consumes tokens directly, returns expansion)
29    Function(FnMacroHandler),
30}
31
32impl std::fmt::Debug for MacroDefinition {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            Self::Text(s) => write!(f, "Text({:?})", s),
36            Self::Tokens { tokens, num_args } => {
37                write!(f, "Tokens {{ {} tokens, {} args }}", tokens.len(), num_args)
38            }
39            Self::Function(_) => write!(f, "Function(...)"),
40        }
41    }
42}
43
44/// Result of expanding a macro once.
45struct MacroExpansion {
46    tokens: Vec<Token>,
47    num_args: usize,
48    unexpandable: bool,
49}
50
51/// The MacroExpander (or "gullet") manages macro expansion.
52///
53/// It sits between the Lexer (mouth) and the Parser (stomach).
54/// Tokens are read from the lexer, pushed onto an internal stack,
55/// and macros are expanded until only non-expandable tokens remain.
56///
57/// Modeled after KaTeX's MacroExpander.ts.
58pub struct MacroExpander<'a> {
59    pub lexer: Lexer<'a>,
60    pub mode: Mode,
61    stack: Vec<Token>,
62    macros: MacroNamespace,
63    expansion_count: usize,
64    max_expand: usize,
65}
66
67/// Scoped macro namespace supporting group nesting.
68struct MacroNamespace {
69    current: HashMap<String, MacroDefinition>,
70    group_stack: Vec<HashMap<String, Option<MacroDefinition>>>,
71}
72
73impl MacroNamespace {
74    fn new() -> Self {
75        Self {
76            current: HashMap::new(),
77            group_stack: Vec::new(),
78        }
79    }
80
81    fn get(&self, name: &str) -> Option<&MacroDefinition> {
82        self.current.get(name)
83    }
84
85    fn set(&mut self, name: String, def: MacroDefinition) {
86        if let Some(undo) = self.group_stack.last_mut() {
87            undo.entry(name.clone()).or_insert_with(|| self.current.get(&name).cloned());
88        }
89        self.current.insert(name, def);
90    }
91
92    fn set_global(&mut self, name: String, def: MacroDefinition) {
93        self.current.insert(name, def);
94    }
95
96    fn has(&self, name: &str) -> bool {
97        self.current.contains_key(name)
98    }
99
100    fn begin_group(&mut self) {
101        self.group_stack.push(HashMap::new());
102    }
103
104    fn end_group(&mut self) {
105        if let Some(undo) = self.group_stack.pop() {
106            for (name, old_val) in undo {
107                match old_val {
108                    Some(def) => { self.current.insert(name, def); }
109                    None => { self.current.remove(&name); }
110                }
111            }
112        }
113    }
114
115    fn end_groups(&mut self) {
116        while !self.group_stack.is_empty() {
117            self.end_group();
118        }
119    }
120}
121
122/// Tokenize a macro expansion string into stack order (same as [`MacroDefinition::Text`] bodies).
123fn lex_string_to_stack_tokens(text: &str) -> Vec<Token> {
124    let mut body_lexer = Lexer::new(text);
125    let mut tokens = Vec::new();
126    loop {
127        let tok = body_lexer.lex();
128        if tok.is_eof() {
129            break;
130        }
131        tokens.push(tok);
132    }
133    tokens.reverse();
134    tokens
135}
136
137impl<'a> MacroExpander<'a> {
138    pub fn new(input: &'a str, mode: Mode) -> Self {
139        let mut me = Self {
140            lexer: Lexer::new(input),
141            mode,
142            stack: Vec::new(),
143            macros: MacroNamespace::new(),
144            expansion_count: 0,
145            max_expand: 1000,
146        };
147        me.load_builtins();
148        me
149    }
150
151    fn load_builtins(&mut self) {
152        let builtins: &[(&str, &str)] = &[
153            // ── Grouping ──
154            ("\\bgroup", "{"),
155            ("\\egroup", "}"),
156
157            // ── Symbols from latex.ltx ──
158            ("\\lq", "`"),
159            ("\\rq", "'"),
160            // \lbrack and \rbrack are in the symbol table directly
161            ("\\aa", "\\r a"),
162            ("\\AA", "\\r A"),
163
164            // ── Active characters ──
165            ("~", "\\nobreakspace"),
166
167            // ── Phantoms ──
168            ("\\hphantom", "\\smash{\\phantom{#1}}"),
169
170            // ── Negated symbols ──
171            ("\\not", "\\html@mathml{\\mathrel{\\mathrlap\\@not}\\nobreak}{\\char\"338}"),
172            ("\\neq", "\\html@mathml{\\mathrel{\\not=}}{\\mathrel{\\char`≠}}"),
173            ("\\ne", "\\neq"),
174            ("\u{2260}", "\\neq"),
175            ("\\notin", "\\html@mathml{\\mathrel{{\\in}\\mathllap{/\\mskip1mu}}}{\\mathrel{\\char`∉}}"),
176            ("\u{2209}", "\\notin"),
177            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
178            ("\u{220C}", "\\notni"),
179            // \le and \ge are in the symbol table directly, not macros
180
181            // ── amsmath iff/implies ──
182            ("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;"),
183            ("\\implies", "\\DOTSB\\;\\Longrightarrow\\;"),
184            ("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;"),
185
186            // ── Italic Greek capitals ──
187            ("\\varGamma", "\\mathit{\\Gamma}"),
188            ("\\varDelta", "\\mathit{\\Delta}"),
189            ("\\varTheta", "\\mathit{\\Theta}"),
190            ("\\varLambda", "\\mathit{\\Lambda}"),
191            ("\\varXi", "\\mathit{\\Xi}"),
192            ("\\varPi", "\\mathit{\\Pi}"),
193            ("\\varSigma", "\\mathit{\\Sigma}"),
194            ("\\varUpsilon", "\\mathit{\\Upsilon}"),
195            ("\\varPhi", "\\mathit{\\Phi}"),
196            ("\\varPsi", "\\mathit{\\Psi}"),
197            ("\\varOmega", "\\mathit{\\Omega}"),
198
199            // ── Spacing (mode-aware via \TextOrMath) ──
200            ("\\,", "\\TextOrMath{\\kern{.1667em}}{\\mskip{3mu}}"),
201            ("\\thinspace", "\\,"),
202            ("\\>", "\\mskip{4mu}"),
203            ("\\:", "\\TextOrMath{\\kern{.2222em}}{\\mskip{4mu}}"),
204            ("\\medspace", "\\:"),
205            ("\\;", "\\TextOrMath{\\kern{.2777em}}{\\mskip{5mu}}"),
206            ("\\thickspace", "\\;"),
207            ("\\!", "\\TextOrMath{\\kern{-.1667em}}{\\mskip{-3mu}}"),
208            ("\\negthinspace", "\\!"),
209            ("\\negmedspace", "\\TextOrMath{\\kern{-.2222em}}{\\mskip{-4mu}}"),
210            ("\\negthickspace", "\\TextOrMath{\\kern{-.2777em}}{\\mskip{-5mu}}"),
211            ("\\enspace", "\\kern.5em "),
212            ("\\enskip", "\\hskip.5em\\relax"),
213            ("\\quad", "\\hskip1em\\relax"),
214            ("\\qquad", "\\hskip2em\\relax"),
215
216            // ── Newline ──
217            ("\\newline", "\\\\\\relax"),
218
219            // ── hspace ──
220            ("\\@hspace", "\\hskip #1\\relax"),
221            ("\\@hspacer", "\\rule{0pt}{0pt}\\hskip #1\\relax"),
222
223            // ── llap / rlap / clap ──
224            ("\\llap", "\\mathllap{\\textrm{#1}}"),
225            ("\\rlap", "\\mathrlap{\\textrm{#1}}"),
226            ("\\clap", "\\mathclap{\\textrm{#1}}"),
227
228            // ── Logos ──
229            ("\\TeX", "\\textrm{\\html@mathml{T\\kern-.1667em\\raisebox{-.5ex}{E}\\kern-.125emX}{TeX}}"),
230            ("\\LaTeX", "\\textrm{\\html@mathml{L\\kern-.36em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{LaTeX}}"),
231            ("\\KaTeX", "\\textrm{\\html@mathml{K\\kern-.17em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{KaTeX}}"),
232
233            // ── imath / jmath ──
234            ("\\imath", "\\html@mathml{\\@imath}{\u{0131}}"),
235            ("\\jmath", "\\html@mathml{\\@jmath}{\u{0237}}"),
236
237            // ── minuso ──
238            ("\\minuso", "\\mathbin{\\html@mathml{{\\mathrlap{\\mathchoice{\\kern{0.145em}}{\\kern{0.145em}}{\\kern{0.1015em}}{\\kern{0.0725em}}\\circ}{-}}}{\\char`\u{29B5}}}"),
239            ("\\clap", "\\mathclap{\\textrm{#1}}"),
240
241            // ── mathstrut / underbar ──
242            ("\\mathstrut", "\\vphantom{(}"),
243            ("\\underbar", "\\underline{\\text{#1}}"),
244
245            // ── Bbbk ──
246            ("\\Bbbk", "\\Bbb{k}"),
247
248            // ── substack ──
249            ("\\substack", "\\begin{subarray}{c}#1\\end{subarray}"),
250
251            // ── boxed ──
252            ("\\boxed", "\\fbox{$\\displaystyle{#1}$}"),
253
254            // ── colon ──
255            ("\\colon", "\\nobreak\\mskip2mu\\mathpunct{}\\mathchoice{\\mkern-3mu}{\\mkern-3mu}{}{}{:}\\mskip6mu\\relax"),
256
257            // ── dots (string-based) ──
258            ("\\dots", "\\cdots"),
259            ("\\cdots", "\\@cdots"),
260            ("\\dotsb", "\\cdots"),
261            ("\\dotsm", "\\cdots"),
262            ("\\dotsi", "\\!\\cdots"),
263            ("\\dotsx", "\\ldots\\,"),
264            ("\\dotsc", "\\ldots"),  // comma list: x,\dotsc,y
265            ("\\dotso", "\\ldots"),  // other
266            ("\\DOTSI", "\\relax"),
267            ("\\DOTSB", "\\relax"),
268            ("\\DOTSX", "\\relax"),
269
270            // ── negated relations / corners (→ symbol table \@xxx) ──
271            ("\\gvertneqq", "\\@gvertneqq"),
272            ("\\lvertneqq", "\\@lvertneqq"),
273            ("\\ngeqq", "\\@ngeqq"),
274            ("\\ngeqslant", "\\@ngeqslant"),
275            ("\\nleqq", "\\@nleqq"),
276            ("\\nleqslant", "\\@nleqslant"),
277            ("\\nshortmid", "\\@nshortmid"),
278            ("\\nshortparallel", "\\@nshortparallel"),
279            ("\\nsubseteqq", "\\@nsubseteqq"),
280            ("\\nsupseteqq", "\\@nsupseteqq"),
281            ("\\ulcorner", "\\@ulcorner"),
282            ("\\urcorner", "\\@urcorner"),
283            ("\\llcorner", "\\@llcorner"),
284            ("\\lrcorner", "\\@lrcorner"),
285            ("\\varsubsetneq", "\\@varsubsetneq"),
286            ("\\varsubsetneqq", "\\@varsubsetneqq"),
287            ("\\varsupsetneq", "\\@varsupsetneq"),
288            ("\\varsupsetneqq", "\\@varsupsetneqq"),
289
290            // ── delimiters / text (compose from existing) ──
291            // Match KaTeX `macros.ts` html@mathml first branch (STIX-style white tortoise brackets).
292            ("\\lBrace", "\\mathopen{\\{\\mkern-3.2mu[}"),
293            ("\\rBrace", "\\mathclose{]\\mkern-3.2mu\\}}"),
294            ("\\llbracket", "\\mathopen{[\\mkern-3.2mu[}"),
295            ("\\rrbracket", "\\mathclose{]\\mkern-3.2mu]}"),
296            ("\\copyright", "\\textcircled{c}"),
297            ("\\textregistered", "\\textcircled{\\scriptsize R}"),
298
299            // ── dddot / ddddot ──
300            ("\\dddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ...}}{#1}}"),
301            ("\\ddddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ....}}{#1}}"),
302
303            // ── vdots ──
304            ("\\vdots", "{\\varvdots\\rule{0pt}{15pt}}"),
305            ("\u{22ee}", "\\vdots"),
306
307            // ── bmod / pod / pmod / mod ──
308            ("\\bmod", "\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}\\mathbin{\\rm mod}\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}"),
309            ("\\pod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern8mu}{\\mkern8mu}{\\mkern8mu}(#1)"),
310            ("\\pmod", "\\pod{{\\rm mod}\\mkern6mu#1}"),
311            ("\\mod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern12mu}{\\mkern12mu}{\\mkern12mu}{\\rm mod}\\,\\,#1"),
312
313            // ── limsup / liminf / etc ──
314            ("\\limsup", "\\DOTSB\\operatorname*{lim\\,sup}"),
315            ("\\liminf", "\\DOTSB\\operatorname*{lim\\,inf}"),
316            ("\\injlim", "\\DOTSB\\operatorname*{inj\\,lim}"),
317            ("\\projlim", "\\DOTSB\\operatorname*{proj\\,lim}"),
318            ("\\varlimsup", "\\DOTSB\\operatorname*{\\overline{\\mathrm{lim}}}"),
319            ("\\varliminf", "\\DOTSB\\operatorname*{\\underline{\\mathrm{lim}}}"),
320            ("\\varinjlim", "\\DOTSB\\operatorname*{\\underrightarrow{\\mathrm{lim}}}"),
321            ("\\varprojlim", "\\DOTSB\\operatorname*{\\underleftarrow{\\mathrm{lim}}}"),
322
323            // ── statmath ──
324            ("\\argmin", "\\DOTSB\\operatorname*{arg\\,min}"),
325            ("\\argmax", "\\DOTSB\\operatorname*{arg\\,max}"),
326            ("\\plim", "\\DOTSB\\mathop{\\operatorname{plim}}\\limits"),
327
328            // ── mathtools colon variants ──
329            ("\\ordinarycolon", ":"),
330            ("\\vcentcolon", "\\mathrel{\\mathop\\ordinarycolon}"),
331            ("\\dblcolon", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon}}{\\mathop{\\char\"2237}}"),
332            ("\\coloneqq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2254}}"),
333            ("\\Coloneqq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2237\\char\"3d}}"),
334            ("\\coloneq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"3a\\char\"2212}}"),
335            ("\\Coloneq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"2237\\char\"2212}}"),
336            ("\\eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2255}}"),
337            ("\\Eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"3d\\char\"2237}}"),
338            ("\\eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2239}}"),
339            ("\\Eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"2212\\char\"2237}}"),
340            ("\\colonapprox", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"3a\\char\"2248}}"),
341            ("\\Colonapprox", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"2237\\char\"2248}}"),
342            ("\\colonsim", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"3a\\char\"223c}}"),
343            ("\\Colonsim", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"2237\\char\"223c}}"),
344
345            // ── colonequals alternate names ──
346            ("\\ratio", "\\vcentcolon"),
347            ("\\coloncolon", "\\dblcolon"),
348            ("\\colonequals", "\\coloneqq"),
349            ("\\coloncolonequals", "\\Coloneqq"),
350            ("\\equalscolon", "\\eqqcolon"),
351            ("\\equalscoloncolon", "\\Eqqcolon"),
352            ("\\colonminus", "\\coloneq"),
353            ("\\coloncolonminus", "\\Coloneq"),
354            ("\\minuscolon", "\\eqcolon"),
355            ("\\minuscoloncolon", "\\Eqcolon"),
356            ("\\coloncolonapprox", "\\Colonapprox"),
357            ("\\coloncolonsim", "\\Colonsim"),
358            ("\\simcolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
359            ("\\simcoloncolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
360            ("\\approxcolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
361            ("\\approxcoloncolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
362
363            // ── braket (string-based) ──
364            ("\\bra", "\\mathinner{\\langle{#1}|}"),
365            ("\\ket", "\\mathinner{|{#1}\\rangle}"),
366            ("\\braket", "\\mathinner{\\langle{#1}\\rangle}"),
367            ("\\Braket", "\\bra@ket{\\left\\langle}{\\,\\middle\\vert\\,}{\\,\\middle\\vert\\,}{\\right\\rangle}"),
368            ("\\Bra", "\\left\\langle#1\\right|"),
369            ("\\Ket", "\\left|#1\\right\\rangle"),
370
371            // ── texvc (MediaWiki) ──
372            ("\\darr", "\\downarrow"),
373            ("\\dArr", "\\Downarrow"),
374            ("\\Darr", "\\Downarrow"),
375            ("\\lang", "\\langle"),
376            ("\\rang", "\\rangle"),
377            ("\\uarr", "\\uparrow"),
378            ("\\uArr", "\\Uparrow"),
379            ("\\Uarr", "\\Uparrow"),
380            ("\\N", "\\mathbb{N}"),
381            ("\\R", "\\mathbb{R}"),
382            ("\\Z", "\\mathbb{Z}"),
383            ("\\alef", "\\aleph"),
384            ("\\alefsym", "\\aleph"),
385            ("\\Alpha", "\\mathrm{A}"),
386            ("\\Beta", "\\mathrm{B}"),
387            ("\\bull", "\\bullet"),
388            ("\\Chi", "\\mathrm{X}"),
389            ("\\clubs", "\\clubsuit"),
390            ("\\cnums", "\\mathbb{C}"),
391            ("\\Complex", "\\mathbb{C}"),
392            ("\\Dagger", "\\ddagger"),
393            ("\\diamonds", "\\diamondsuit"),
394            ("\\empty", "\\emptyset"),
395            ("\\Epsilon", "\\mathrm{E}"),
396            ("\\Eta", "\\mathrm{H}"),
397            ("\\exist", "\\exists"),
398            ("\\harr", "\\leftrightarrow"),
399            ("\\hArr", "\\Leftrightarrow"),
400            ("\\Harr", "\\Leftrightarrow"),
401            ("\\hearts", "\\heartsuit"),
402            ("\\image", "\\Im"),
403            ("\\infin", "\\infty"),
404            ("\\Iota", "\\mathrm{I}"),
405            ("\\isin", "\\in"),
406            ("\\Kappa", "\\mathrm{K}"),
407            ("\\larr", "\\leftarrow"),
408            ("\\lArr", "\\Leftarrow"),
409            ("\\Larr", "\\Leftarrow"),
410            ("\\lrarr", "\\leftrightarrow"),
411            ("\\lrArr", "\\Leftrightarrow"),
412            ("\\Lrarr", "\\Leftrightarrow"),
413            ("\\Mu", "\\mathrm{M}"),
414            ("\\natnums", "\\mathbb{N}"),
415            ("\\Nu", "\\mathrm{N}"),
416            ("\\Omicron", "\\mathrm{O}"),
417            ("\\plusmn", "\\pm"),
418            ("\\rarr", "\\rightarrow"),
419            ("\\rArr", "\\Rightarrow"),
420            ("\\Rarr", "\\Rightarrow"),
421            ("\\real", "\\Re"),
422            ("\\reals", "\\mathbb{R}"),
423            ("\\Reals", "\\mathbb{R}"),
424            ("\\Rho", "\\mathrm{P}"),
425            ("\\sdot", "\\cdot"),
426            ("\\sect", "\\S"),
427            ("\\spades", "\\spadesuit"),
428            ("\\sub", "\\subset"),
429            ("\\sube", "\\subseteq"),
430            ("\\supe", "\\supseteq"),
431            ("\\Tau", "\\mathrm{T}"),
432            ("\\thetasym", "\\vartheta"),
433            ("\\weierp", "\\wp"),
434            ("\\Zeta", "\\mathrm{Z}"),
435
436            // ── Khan Academy color aliases ──
437            ("\\blue", "\\textcolor{##6495ed}{#1}"),
438            ("\\orange", "\\textcolor{##ffa500}{#1}"),
439            ("\\pink", "\\textcolor{##ff00af}{#1}"),
440            ("\\red", "\\textcolor{##df0030}{#1}"),
441            ("\\green", "\\textcolor{##28ae7b}{#1}"),
442            ("\\gray", "\\textcolor{gray}{#1}"),
443            ("\\purple", "\\textcolor{##9d38bd}{#1}"),
444
445            // ── Unicode script letters ──
446            ("\u{212C}", "\\mathscr{B}"),
447            ("\u{2130}", "\\mathscr{E}"),
448            ("\u{2131}", "\\mathscr{F}"),
449            ("\u{210B}", "\\mathscr{H}"),
450            ("\u{2110}", "\\mathscr{I}"),
451            ("\u{2112}", "\\mathscr{L}"),
452            ("\u{2133}", "\\mathscr{M}"),
453            ("\u{211B}", "\\mathscr{R}"),
454            ("\u{212D}", "\\mathfrak{C}"),
455            ("\u{210C}", "\\mathfrak{H}"),
456            ("\u{2128}", "\\mathfrak{Z}"),
457
458            // ── notni ──
459            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
460
461            // ── actuarialangle ──
462            ("\\angln", "{\\angl n}"),
463
464            // ── set/Set (braket notation, simplified) ──
465            ("\\set", "\\bra@set{\\{\\,}{\\mid}{}{\\,\\}}"),
466            ("\\Set", "\\bra@set{\\left\\{\\:}{\\;\\middle\\vert\\;}{\\;\\middle\\Vert\\;}{\\:\\right\\}}"),
467
468            // ── equation numbering (display math; no-op in parser) ──
469            ("\\nonumber", "\\relax"),
470            ("\\notag", "\\relax"),
471
472            // ── KaTeX mhchem (\\tripledash for \\bond ~ forms) ──
473            (
474                "\\tripledash",
475                "{\\vphantom{-}\\raisebox{2.56mu}{$\\mkern2mu\\tiny\\text{-}\\mkern1mu\\text{-}\\mkern1mu\\text{-}\\mkern2mu$}}",
476            ),
477        ];
478
479        for &(name, expansion) in builtins {
480            self.macros.set(
481                name.to_string(),
482                MacroDefinition::Text(expansion.to_string()),
483            );
484        }
485
486        self.load_function_macros();
487    }
488
489    fn load_function_macros(&mut self) {
490        // \noexpand: mark the next token as non-expandable (only if expandable)
491        self.macros.set(
492            "\\noexpand".to_string(),
493            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
494                let mut tok = me.pop_token();
495                if me.is_expandable(&tok.text) {
496                    tok.noexpand = true;
497                    tok.treat_as_relax = true;
498                }
499                Ok(vec![tok])
500            }),
501        );
502
503        // \@firstoftwo{A}{B} → A
504        // NOTE: consume_args returns tokens in stack order (reversed).
505        // We return them as-is since expand_once does stack.extend(tokens).
506        self.macros.set(
507            "\\@firstoftwo".to_string(),
508            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
509                let args = me.consume_args(2)?;
510                Ok(args.into_iter().next().unwrap())
511            }),
512        );
513
514        // \@secondoftwo{A}{B} → B
515        self.macros.set(
516            "\\@secondoftwo".to_string(),
517            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
518                let args = me.consume_args(2)?;
519                Ok(args.into_iter().nth(1).unwrap())
520            }),
521        );
522
523        // \@ifnextchar{C}{T}{F}: peek; if next non-space == C then T else F
524        self.macros.set(
525            "\\@ifnextchar".to_string(),
526            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
527                let args = me.consume_args(3)?;
528                me.consume_spaces();
529                let next = me.future().text.clone();
530                let char_arg = &args[0];
531                // char_arg is reversed; the "first" char in original order is the last element
532                let char_text = char_arg.first().map_or("", |t| t.text.as_str());
533                if next == char_text {
534                    Ok(args[1].clone())
535                } else {
536                    Ok(args[2].clone())
537                }
538            }),
539        );
540
541        // \@ifstar{with-star}{without-star}: if next is * → consume * and use first arg
542        self.macros.set(
543            "\\@ifstar".to_string(),
544            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
545                let args = me.consume_args(2)?;
546                let next = me.future().text.clone();
547                if next == "*" {
548                    me.pop_token();
549                    Ok(args[0].clone())
550                } else {
551                    Ok(args[1].clone())
552                }
553            }),
554        );
555
556        // \TextOrMath{text-branch}{math-branch}: choose based on mode
557        self.macros.set(
558            "\\TextOrMath".to_string(),
559            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
560                let args = me.consume_args(2)?;
561                if me.mode == Mode::Text {
562                    Ok(args[0].clone())
563                } else {
564                    Ok(args[1].clone())
565                }
566            }),
567        );
568
569        // \html@mathml is registered as a function in htmlmathml.rs
570
571        // \newcommand{\name}[nargs]{body}
572        self.macros.set(
573            "\\newcommand".to_string(),
574            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
575                handle_newcommand(me, false, true)
576            }),
577        );
578
579        // \renewcommand{\name}[nargs]{body}
580        self.macros.set(
581            "\\renewcommand".to_string(),
582            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
583                handle_newcommand(me, true, false)
584            }),
585        );
586
587        // \providecommand{\name}[nargs]{body}
588        self.macros.set(
589            "\\providecommand".to_string(),
590            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
591                handle_newcommand(me, true, true)
592            }),
593        );
594
595        // \char: parse decimal/octal/hex/backtick number → \@char{N}
596        self.macros.set(
597            "\\char".to_string(),
598            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
599                let mut tok = me.pop_token();
600                let mut number: i64;
601                let base: Option<u32>;
602
603                if tok.text == "'" {
604                    base = Some(8);
605                    tok = me.pop_token();
606                } else if tok.text == "\"" {
607                    base = Some(16);
608                    tok = me.pop_token();
609                } else if tok.text == "`" {
610                    tok = me.pop_token();
611                    if tok.text.starts_with('\\') {
612                        number = tok.text.chars().nth(1).map_or(0, |c| c as i64);
613                    } else {
614                        number = tok.text.chars().next().map_or(0, |c| c as i64);
615                    }
616                    // Build \@char{N} tokens in reverse (stack order)
617                    let s = number.to_string();
618                    let loc = tok.loc.clone();
619                    let mut result = vec![Token::new("}", loc.start, loc.end)];
620                    for ch in s.chars().rev() {
621                        result.push(Token::new(ch.to_string(), loc.start, loc.end));
622                    }
623                    result.push(Token::new("{", loc.start, loc.end));
624                    result.push(Token::new("\\@char", loc.start, loc.end));
625                    return Ok(result);
626                } else {
627                    base = Some(10);
628                }
629
630                if let Some(b) = base {
631                    number = i64::from_str_radix(&tok.text, b).unwrap_or(0);
632                    loop {
633                        let next = me.future().text.clone();
634                        if let Ok(d) = i64::from_str_radix(&next, b) {
635                            me.pop_token();
636                            number = number * (b as i64) + d;
637                        } else {
638                            break;
639                        }
640                    }
641                } else {
642                    number = 0;
643                }
644
645                let s = number.to_string();
646                let loc = tok.loc.clone();
647                let mut result = vec![Token::new("}", loc.start, loc.end)];
648                for ch in s.chars().rev() {
649                    result.push(Token::new(ch.to_string(), loc.start, loc.end));
650                }
651                result.push(Token::new("{", loc.start, loc.end));
652                result.push(Token::new("\\@char", loc.start, loc.end));
653                Ok(result)
654            }),
655        );
656
657        // \operatorname: \@ifstar\operatornamewithlimits\operatorname@
658        self.macros.set(
659            "\\operatorname".to_string(),
660            MacroDefinition::Text(
661                "\\@ifstar\\operatornamewithlimits\\operatorname@".to_string(),
662            ),
663        );
664
665        // \message{...}: consume argument and discard (no-op)
666        self.macros.set(
667            "\\message".to_string(),
668            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
669                let _args = me.consume_args(1)?;
670                Ok(vec![])
671            }),
672        );
673
674        // \errmessage{...}: consume argument and discard (no-op)
675        self.macros.set(
676            "\\errmessage".to_string(),
677            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
678                let _args = me.consume_args(1)?;
679                Ok(vec![])
680            }),
681        );
682
683        // KaTeX HTML extensions: no-op (only render content, no HTML attributes).
684        // Not standard LaTeX; for compatibility we parse and expand to second argument only.
685        for name in &["\\htmlClass", "\\htmlData", "\\htmlId", "\\htmlStyle"] {
686            let name = (*name).to_string();
687            self.macros.set(
688                name.clone(),
689                MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
690                    let args = me.consume_args(2)?;
691                    let content = args[1].iter().cloned().rev().collect::<Vec<_>>();
692                    Ok(content)
693                }),
694            );
695        }
696
697        // \bra@ket: like \bra@set but replaces ALL | at depth 0 (for \Braket)
698        self.macros.set(
699            "\\bra@ket".to_string(),
700            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
701                let args = me.consume_args(4)?;
702                let left = args[0].clone();
703                let middle = args[1].clone();
704                let middle_double = args[2].clone();
705                let right = args[3].clone();
706
707                let content = me.consume_args(1)?;
708                let content = content.into_iter().next().unwrap();
709
710                // Convert stack-order (reversed) to logical order, replace all | at depth 0,
711                // then reverse back to stack order.
712                let logical: Vec<Token> = content.into_iter().rev().collect();
713                let mut new_logical: Vec<Token> = Vec::new();
714                let mut depth: i32 = 0;
715                let mut i = 0;
716                while i < logical.len() {
717                    let t = &logical[i];
718                    if t.text == "{" {
719                        depth += 1;
720                        new_logical.push(t.clone());
721                    } else if t.text == "}" {
722                        depth -= 1;
723                        new_logical.push(t.clone());
724                    } else if depth == 0 && t.text == "|" {
725                        // Check for || (double pipe) → middleDouble
726                        if !middle_double.is_empty()
727                            && i + 1 < logical.len()
728                            && logical[i + 1].text == "|"
729                        {
730                            // middle_double is in stack/reversed order; reverse to logical order
731                            new_logical.extend(middle_double.iter().rev().cloned());
732                            i += 2;
733                            continue;
734                        }
735                        // middle is in stack/reversed order; reverse to logical order
736                        new_logical.extend(middle.iter().rev().cloned());
737                    } else {
738                        new_logical.push(t.clone());
739                    }
740                    i += 1;
741                }
742
743                // Reverse back to stack order
744                let content_rev: Vec<Token> = new_logical.into_iter().rev().collect();
745
746                // Build: right + content + left (reversed for stack)
747                let mut to_expand = Vec::new();
748                to_expand.extend(right);
749                to_expand.extend(content_rev);
750                to_expand.extend(left);
751
752                me.begin_group();
753                let expanded = me.expand_tokens(to_expand)?;
754                me.end_group();
755
756                Ok(expanded)
757            }),
758        );
759
760        // \bra@set: braket set notation helper
761        // Only replaces the FIRST | with middle tokens (one-shot), matching KaTeX
762        self.macros.set(
763            "\\bra@set".to_string(),
764            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
765                let args = me.consume_args(4)?;
766                let left = args[0].clone();
767                let middle = args[1].clone();
768                let middle_double = args[2].clone();
769                let right = args[3].clone();
770
771                let content = me.consume_args(1)?;
772                let mut content = content.into_iter().next().unwrap();
773
774                // Scan content and replace only the first | at depth 0
775                // Content tokens are in reversed order (stack), so iterate from end
776                let mut depth: i32 = 0;
777                let mut _first_pipe_idx: Option<usize> = None;
778                // Tokens are reversed (last token first in vec), scan in logical order
779                for i in (0..content.len()).rev() {
780                    let t = &content[i];
781                    if t.text == "{" { depth += 1; }
782                    else if t.text == "}" { depth -= 1; }
783                    else if depth == 0 && t.text == "|" {
784                        // Check for || (double pipe) → middleDouble
785                        if !middle_double.is_empty() && i > 0 && content[i - 1].text == "|" {
786                            _first_pipe_idx = Some(i);
787                            // Replace || with middleDouble
788                            content.remove(i);
789                            content.remove(i - 1);
790                            let insert_at = if i >= 2 { i - 1 } else { 0 };
791                            for (j, tok) in middle_double.iter().enumerate() {
792                                content.insert(insert_at + j, tok.clone());
793                            }
794                            break;
795                        }
796                        _first_pipe_idx = Some(i);
797                        content.remove(i);
798                        for (j, tok) in middle.iter().enumerate() {
799                            content.insert(i + j, tok.clone());
800                        }
801                        break;
802                    }
803                }
804
805                // Build: right + content + left (reversed for stack)
806                let mut to_expand = Vec::new();
807                to_expand.extend(right);
808                to_expand.extend(content);
809                to_expand.extend(left);
810
811                me.begin_group();
812                let expanded = me.expand_tokens(to_expand)?;
813                me.end_group();
814
815                Ok(expanded)
816            }),
817        );
818
819        // \\ce / \\pu: KaTeX mhchem 3.3.0 (Rust port in `crate::mhchem`)
820        self.macros.set(
821            "\\ce".to_string(),
822            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
823                let args = me.consume_args(1)?;
824                let s = crate::mhchem::mhchem_arg_tokens_to_string(&args[0]);
825                let tex = crate::mhchem::chem_parse_str(&s, "ce")
826                    .map_err(|e| ParseError::msg(format!("\\ce: {e}")))?;
827                Ok(lex_string_to_stack_tokens(&tex))
828            }),
829        );
830        self.macros.set(
831            "\\pu".to_string(),
832            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
833                let args = me.consume_args(1)?;
834                let s = crate::mhchem::mhchem_arg_tokens_to_string(&args[0]);
835                let tex = crate::mhchem::chem_parse_str(&s, "pu")
836                    .map_err(|e| ParseError::msg(format!("\\pu: {e}")))?;
837                Ok(lex_string_to_stack_tokens(&tex))
838            }),
839        );
840    }
841
842    pub fn set_macro(&mut self, name: String, def: MacroDefinition) {
843        self.macros.set(name, def);
844    }
845
846    pub fn set_macro_global(&mut self, name: String, def: MacroDefinition) {
847        self.macros.set_global(name, def);
848    }
849
850    pub fn set_text_macro(&mut self, name: &str, text: &str) {
851        self.macros.set(
852            name.to_string(),
853            MacroDefinition::Text(text.to_string()),
854        );
855    }
856
857    pub fn get_macro(&self, name: &str) -> Option<&MacroDefinition> {
858        self.macros.get(name)
859    }
860
861    /// Expand a list of tokens fully (for \edef/\xdef).
862    pub fn expand_tokens(&mut self, tokens: Vec<Token>) -> ParseResult<Vec<Token>> {
863        let saved_stack = std::mem::take(&mut self.stack);
864        self.stack = tokens;
865
866        let mut result = Vec::new();
867        loop {
868            if self.stack.is_empty() {
869                break;
870            }
871            let expanded = self.expand_once(false)?;
872            if !expanded {
873                if let Some(tok) = self.stack.pop() {
874                    if tok.is_eof() {
875                        break;
876                    }
877                    result.push(tok);
878                }
879            }
880        }
881
882        self.stack = saved_stack;
883        result.reverse();
884        Ok(result)
885    }
886
887    pub fn switch_mode(&mut self, new_mode: Mode) {
888        self.mode = new_mode;
889    }
890
891    pub fn begin_group(&mut self) {
892        self.macros.begin_group();
893    }
894
895    pub fn end_group(&mut self) {
896        self.macros.end_group();
897    }
898
899    pub fn end_groups(&mut self) {
900        self.macros.end_groups();
901    }
902
903    /// Returns the topmost token on the stack, without expanding it.
904    pub fn future(&mut self) -> &Token {
905        if self.stack.is_empty() {
906            let tok = self.lexer.lex();
907            self.stack.push(tok);
908        }
909        self.stack.last().unwrap()
910    }
911
912    /// Remove and return the next unexpanded token.
913    pub fn pop_token(&mut self) -> Token {
914        self.future();
915        self.stack.pop().unwrap()
916    }
917
918    /// Modify the top token's text on the stack (for \global prefix handling).
919    pub fn set_top_text(&mut self, text: String) {
920        self.future();
921        if let Some(tok) = self.stack.last_mut() {
922            tok.text = text;
923        }
924    }
925
926    /// Push a token onto the stack.
927    pub fn push_token(&mut self, token: Token) {
928        self.stack.push(token);
929    }
930
931    /// Push multiple tokens onto the stack.
932    pub fn push_tokens(&mut self, tokens: Vec<Token>) {
933        self.stack.extend(tokens);
934    }
935
936    /// Consume all following space tokens, without expansion.
937    pub fn consume_spaces(&mut self) {
938        loop {
939            let is_space = self.future().text == " ";
940            if is_space {
941                self.stack.pop();
942            } else {
943                break;
944            }
945        }
946    }
947
948    /// Expand the next token once if possible.
949    /// Returns Ok(true) if expanded, Ok(false) if not expandable.
950    fn expand_once(&mut self, expandable_only: bool) -> ParseResult<bool> {
951        let top_token = self.pop_token();
952        let name = &top_token.text;
953
954        if top_token.noexpand {
955            self.push_token(top_token);
956            return Ok(false);
957        }
958
959        // Check for function-based macro first — always expandable
960        if let Some(MacroDefinition::Function(handler)) = self.macros.get(name).cloned() {
961            self.count_expansion(1)?;
962            let tokens = handler(self)?;
963            self.stack.extend(tokens);
964            return Ok(true);
965        }
966
967        let expansion = self.get_expansion(name);
968        match expansion {
969            None => {
970                if expandable_only && name.starts_with('\\') && !self.is_defined(name) {
971                    return Err(ParseError::new(
972                        format!("Undefined control sequence: {}", name),
973                        Some(&top_token),
974                    ));
975                }
976                self.push_token(top_token);
977                Ok(false)
978            }
979            Some(exp) if expandable_only && exp.unexpandable => {
980                self.push_token(top_token);
981                Ok(false)
982            }
983            Some(exp) => {
984                self.count_expansion(1)?;
985                let mut tokens = exp.tokens;
986                if exp.num_args > 0 {
987                    let args = self.consume_args(exp.num_args)?;
988                    tokens = self.substitute_args(tokens, &args);
989                }
990                self.stack.extend(tokens);
991                Ok(true)
992            }
993        }
994    }
995
996    fn substitute_args(&self, mut tokens: Vec<Token>, args: &[Vec<Token>]) -> Vec<Token> {
997        let mut i = tokens.len();
998        while i > 0 {
999            i -= 1;
1000            if tokens[i].text == "#" && i > 0 {
1001                let next = &tokens[i - 1];
1002                if next.text == "#" {
1003                    tokens.remove(i);
1004                    i -= 1;
1005                } else if let Ok(n) = next.text.parse::<usize>() {
1006                    if n >= 1 && n <= args.len() {
1007                        tokens.remove(i);
1008                        tokens.remove(i - 1);
1009                        let arg_tokens = &args[n - 1];
1010                        for (j, t) in arg_tokens.iter().enumerate() {
1011                            tokens.insert(i - 1 + j, t.clone());
1012                        }
1013                        i = i.saturating_sub(1);
1014                    }
1015                }
1016            }
1017        }
1018        tokens
1019    }
1020
1021    fn get_expansion(&self, name: &str) -> Option<MacroExpansion> {
1022        let def = self.macros.get(name)?;
1023
1024        if name.len() == 1 {
1025            let ch = name.chars().next().unwrap();
1026            let catcode = self.lexer_catcode(ch);
1027            if catcode != 0 && catcode != 13 {
1028                return None;
1029            }
1030        }
1031
1032        match def {
1033            MacroDefinition::Text(text) => {
1034                let mut num_args = 0;
1035                let stripped = text.replace("##", "");
1036                while stripped.contains(&format!("#{}", num_args + 1)) {
1037                    num_args += 1;
1038                }
1039                let mut body_lexer = Lexer::new(text);
1040                let mut tokens = Vec::new();
1041                loop {
1042                    let tok = body_lexer.lex();
1043                    if tok.is_eof() {
1044                        break;
1045                    }
1046                    tokens.push(tok);
1047                }
1048                tokens.reverse();
1049                Some(MacroExpansion {
1050                    tokens,
1051                    num_args,
1052                    unexpandable: false,
1053                })
1054            }
1055            MacroDefinition::Tokens { tokens, num_args } => Some(MacroExpansion {
1056                tokens: tokens.clone(),
1057                num_args: *num_args,
1058                unexpandable: false,
1059            }),
1060            MacroDefinition::Function(_) => {
1061                // Signal that this is a function macro; handled in expand_once
1062                Some(MacroExpansion {
1063                    tokens: vec![],
1064                    num_args: 0,
1065                    unexpandable: false,
1066                })
1067            }
1068        }
1069    }
1070
1071    fn lexer_catcode(&self, ch: char) -> u8 {
1072        self.lexer.get_catcode(ch)
1073    }
1074
1075    fn count_expansion(&mut self, amount: usize) -> ParseResult<()> {
1076        self.expansion_count += amount;
1077        if self.expansion_count > self.max_expand {
1078            Err(ParseError::msg(
1079                "Too many expansions: infinite loop or need to increase maxExpand setting",
1080            ))
1081        } else {
1082            Ok(())
1083        }
1084    }
1085
1086    /// Recursively expand the next token until a non-expandable token is found.
1087    pub fn expand_next_token(&mut self) -> ParseResult<Token> {
1088        loop {
1089            let expanded = self.expand_once(false)?;
1090            if !expanded {
1091                let mut token = self.stack.pop().unwrap();
1092                if token.treat_as_relax {
1093                    token.text = "\\relax".to_string();
1094                }
1095                return Ok(token);
1096            }
1097        }
1098    }
1099
1100    /// Consume a single argument from the token stream.
1101    pub fn consume_arg(&mut self, delims: Option<&[&str]>) -> ParseResult<ConsumedArg> {
1102        let is_delimited = delims.is_some_and(|d| !d.is_empty());
1103        if !is_delimited {
1104            self.consume_spaces();
1105        }
1106
1107        let start = self.future().clone();
1108        let mut tokens = Vec::new();
1109        let mut depth: i32 = 0;
1110        let mut end_tok;
1111
1112        loop {
1113            let tok = self.pop_token();
1114            end_tok = tok.clone();
1115            tokens.push(tok.clone());
1116
1117            if tok.text == "{" {
1118                depth += 1;
1119            } else if tok.text == "}" {
1120                depth -= 1;
1121                if depth == -1 {
1122                    return Err(ParseError::new("Extra }", Some(&tok)));
1123                }
1124            } else if tok.is_eof() {
1125                return Err(ParseError::new(
1126                    "Unexpected end of input in a macro argument",
1127                    Some(&tok),
1128                ));
1129            }
1130
1131            if depth == 0 && !is_delimited {
1132                break;
1133            }
1134
1135            if let Some(delims) = delims {
1136                if is_delimited && depth == 0 {
1137                    if let Some(last) = delims.last() {
1138                        if tok.text == *last {
1139                            tokens.pop();
1140                            break;
1141                        }
1142                    }
1143                }
1144            }
1145        }
1146
1147        if start.text == "{" && tokens.last().is_some_and(|t| t.text == "}") {
1148            tokens.pop();
1149            tokens.remove(0);
1150        }
1151
1152        tokens.reverse();
1153
1154        Ok(ConsumedArg {
1155            tokens,
1156            start,
1157            end: end_tok,
1158        })
1159    }
1160
1161    /// Consume N arguments.
1162    fn consume_args(&mut self, num_args: usize) -> ParseResult<Vec<Vec<Token>>> {
1163        let mut args = Vec::with_capacity(num_args);
1164        for _ in 0..num_args {
1165            let arg = self.consume_arg(None)?;
1166            args.push(arg.tokens);
1167        }
1168        Ok(args)
1169    }
1170
1171    /// Scan a function argument (optional or mandatory).
1172    /// Pushes an EOF token to mark the end, then pushes the argument tokens.
1173    pub fn scan_argument(&mut self, is_optional: bool) -> ParseResult<Option<Token>> {
1174        if is_optional {
1175            self.consume_spaces();
1176            if self.future().text != "[" {
1177                return Ok(None);
1178            }
1179            let start = self.pop_token();
1180            let arg = self.consume_arg(Some(&["]"]))?;
1181            let end = &arg.end;
1182            let end_loc = end.loc.clone();
1183
1184            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1185            self.push_tokens(arg.tokens);
1186
1187            let result = Token {
1188                text: String::new(),
1189                loc: SourceLocation::range(&start.loc, &end_loc),
1190                noexpand: false,
1191                treat_as_relax: false,
1192            };
1193            Ok(Some(result))
1194        } else {
1195            let arg = self.consume_arg(None)?;
1196            let end_loc = arg.end.loc.clone();
1197
1198            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1199            self.push_tokens(arg.tokens);
1200
1201            let result = Token {
1202                text: String::new(),
1203                loc: SourceLocation::range(&arg.start.loc, &end_loc),
1204                noexpand: false,
1205                treat_as_relax: false,
1206            };
1207            Ok(Some(result))
1208        }
1209    }
1210
1211    /// Check if a command name is currently defined.
1212    pub fn is_defined(&self, name: &str) -> bool {
1213        self.macros.has(name)
1214            || FUNCTIONS.contains_key(name)
1215            || is_known_symbol(name)
1216            || IMPLICIT_COMMANDS.contains(&name)
1217    }
1218
1219    /// Check if a command is expandable.
1220    pub fn is_expandable(&self, name: &str) -> bool {
1221        if let Some(_def) = self.macros.get(name) {
1222            return true;
1223        }
1224        if let Some(func) = FUNCTIONS.get(name) {
1225            return !func.primitive;
1226        }
1227        false
1228    }
1229}
1230
1231pub struct ConsumedArg {
1232    pub tokens: Vec<Token>,
1233    pub start: Token,
1234    pub end: Token,
1235}
1236
1237fn handle_newcommand(
1238    me: &mut MacroExpander,
1239    exists_ok: bool,
1240    nonexists_ok: bool,
1241) -> ParseResult<Vec<Token>> {
1242    let name_arg = me.consume_arg(None)?;
1243    // name_arg.tokens is reversed (stack order); last element = first token in original
1244    let name = name_arg.tokens.last().map_or_else(String::new, |t| t.text.clone());
1245
1246    let exists = me.is_defined(&name);
1247    if exists && !exists_ok {
1248        return Err(ParseError::msg(format!(
1249            "\\newcommand{{{}}} attempting to redefine {}; use \\renewcommand",
1250            name, name
1251        )));
1252    }
1253    if !exists && !nonexists_ok {
1254        return Err(ParseError::msg(format!(
1255            "\\renewcommand{{{}}} when command {} does not yet exist; use \\newcommand",
1256            name, name
1257        )));
1258    }
1259
1260    me.consume_spaces();
1261    let mut num_args = 0usize;
1262    if me.future().text == "[" {
1263        me.pop_token();
1264        let narg_tok = me.pop_token();
1265        num_args = narg_tok.text.parse().unwrap_or(0);
1266        let close = me.pop_token();
1267        if close.text != "]" {
1268            return Err(ParseError::msg("Expected ] in \\newcommand"));
1269        }
1270    }
1271
1272    let body_arg = me.consume_arg(None)?;
1273    let tokens = body_arg.tokens;
1274
1275    me.set_macro(name, MacroDefinition::Tokens { tokens, num_args });
1276    Ok(vec![])
1277}
1278
1279fn is_known_symbol(name: &str) -> bool {
1280    use ratex_font::symbols;
1281    symbols::get_symbol(name, symbols::Mode::Math).is_some()
1282        || symbols::get_symbol(name, symbols::Mode::Text).is_some()
1283}