Skip to main content

ratex_parser/
macro_expander.rs

1use std::collections::HashMap;
2
3use ratex_lexer::token::{SourceLocation, Token};
4use ratex_lexer::Lexer;
5
6use crate::error::{ParseError, ParseResult};
7use crate::functions::FUNCTIONS;
8use crate::parse_node::Mode;
9
10/// Commands that act like macros but aren't defined as a macro, function, or symbol.
11/// Used in `is_defined`.
12pub static IMPLICIT_COMMANDS: &[&str] = &["^", "_", "\\limits", "\\nolimits"];
13
14/// Handler type for function-based macros (e.g. \TextOrMath, \@ifstar).
15/// Takes the MacroExpander mutably and returns tokens to push onto the stack.
16pub type FnMacroHandler = fn(&mut MacroExpander) -> ParseResult<Vec<Token>>;
17
18/// A macro definition: string template, token list, or function.
19#[derive(Clone)]
20pub enum MacroDefinition {
21    /// Simple string expansion (e.g., `\def\foo{bar}` → "bar")
22    Text(String),
23    /// Pre-tokenized expansion with argument count
24    Tokens {
25        tokens: Vec<Token>,
26        num_args: usize,
27    },
28    /// Function-based macro (consumes tokens directly, returns expansion)
29    Function(FnMacroHandler),
30}
31
32impl std::fmt::Debug for MacroDefinition {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            Self::Text(s) => write!(f, "Text({:?})", s),
36            Self::Tokens { tokens, num_args } => {
37                write!(f, "Tokens {{ {} tokens, {} args }}", tokens.len(), num_args)
38            }
39            Self::Function(_) => write!(f, "Function(...)"),
40        }
41    }
42}
43
44/// Result of expanding a macro once.
45struct MacroExpansion {
46    tokens: Vec<Token>,
47    num_args: usize,
48    unexpandable: bool,
49}
50
51/// The MacroExpander (or "gullet") manages macro expansion.
52///
53/// It sits between the Lexer (mouth) and the Parser (stomach).
54/// Tokens are read from the lexer, pushed onto an internal stack,
55/// and macros are expanded until only non-expandable tokens remain.
56///
57/// Modeled after KaTeX's MacroExpander.ts.
58pub struct MacroExpander<'a> {
59    pub lexer: Lexer<'a>,
60    pub mode: Mode,
61    stack: Vec<Token>,
62    macros: MacroNamespace,
63    expansion_count: usize,
64    max_expand: usize,
65}
66
67/// Scoped macro namespace supporting group nesting.
68struct MacroNamespace {
69    current: HashMap<String, MacroDefinition>,
70    group_stack: Vec<HashMap<String, Option<MacroDefinition>>>,
71}
72
73impl MacroNamespace {
74    fn new() -> Self {
75        Self {
76            current: HashMap::new(),
77            group_stack: Vec::new(),
78        }
79    }
80
81    fn get(&self, name: &str) -> Option<&MacroDefinition> {
82        self.current.get(name)
83    }
84
85    fn set(&mut self, name: String, def: MacroDefinition) {
86        if let Some(undo) = self.group_stack.last_mut() {
87            undo.entry(name.clone()).or_insert_with(|| self.current.get(&name).cloned());
88        }
89        self.current.insert(name, def);
90    }
91
92    fn set_global(&mut self, name: String, def: MacroDefinition) {
93        self.current.insert(name, def);
94    }
95
96    fn has(&self, name: &str) -> bool {
97        self.current.contains_key(name)
98    }
99
100    fn begin_group(&mut self) {
101        self.group_stack.push(HashMap::new());
102    }
103
104    fn end_group(&mut self) {
105        if let Some(undo) = self.group_stack.pop() {
106            for (name, old_val) in undo {
107                match old_val {
108                    Some(def) => { self.current.insert(name, def); }
109                    None => { self.current.remove(&name); }
110                }
111            }
112        }
113    }
114
115    fn end_groups(&mut self) {
116        while !self.group_stack.is_empty() {
117            self.end_group();
118        }
119    }
120}
121
122/// Tokenize a macro expansion string into stack order (same as [`MacroDefinition::Text`] bodies).
123fn lex_string_to_stack_tokens(text: &str) -> Vec<Token> {
124    let mut body_lexer = Lexer::new(text);
125    let mut tokens = Vec::new();
126    loop {
127        let tok = body_lexer.lex();
128        if tok.is_eof() {
129            break;
130        }
131        tokens.push(tok);
132    }
133    tokens.reverse();
134    tokens
135}
136
137impl<'a> MacroExpander<'a> {
138    pub fn new(input: &'a str, mode: Mode) -> Self {
139        let mut me = Self {
140            lexer: Lexer::new(input),
141            mode,
142            stack: Vec::new(),
143            macros: MacroNamespace::new(),
144            expansion_count: 0,
145            max_expand: 1000,
146        };
147        me.load_builtins();
148        me
149    }
150
151    fn load_builtins(&mut self) {
152        let builtins: &[(&str, &str)] = &[
153            // ── Grouping ──
154            ("\\bgroup", "{"),
155            ("\\egroup", "}"),
156
157            // ── Symbols from latex.ltx ──
158            ("\\lq", "`"),
159            ("\\rq", "'"),
160            // \lbrack and \rbrack are in the symbol table directly
161            ("\\aa", "\\r a"),
162            ("\\AA", "\\r A"),
163
164            // ── Active characters ──
165            ("~", "\\nobreakspace"),
166
167            // ── Phantoms ──
168            ("\\hphantom", "\\smash{\\phantom{#1}}"),
169
170            // ── Negated symbols ──
171            ("\\not", "\\html@mathml{\\mathrel{\\mathrlap\\@not}\\nobreak}{\\char\"338}"),
172            ("\\neq", "\\html@mathml{\\mathrel{\\not=}}{\\mathrel{\\char`≠}}"),
173            ("\\ne", "\\neq"),
174            ("\u{2260}", "\\neq"),
175            ("\\notin", "\\html@mathml{\\mathrel{{\\in}\\mathllap{/\\mskip1mu}}}{\\mathrel{\\char`∉}}"),
176            ("\u{2209}", "\\notin"),
177            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
178            ("\u{220C}", "\\notni"),
179            // \le and \ge are in the symbol table directly, not macros
180
181            // ── amsmath iff/implies ──
182            ("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;"),
183            ("\\implies", "\\DOTSB\\;\\Longrightarrow\\;"),
184            ("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;"),
185
186            // ── Italic Greek capitals ──
187            ("\\varGamma", "\\mathit{\\Gamma}"),
188            ("\\varDelta", "\\mathit{\\Delta}"),
189            ("\\varTheta", "\\mathit{\\Theta}"),
190            ("\\varLambda", "\\mathit{\\Lambda}"),
191            ("\\varXi", "\\mathit{\\Xi}"),
192            ("\\varPi", "\\mathit{\\Pi}"),
193            ("\\varSigma", "\\mathit{\\Sigma}"),
194            ("\\varUpsilon", "\\mathit{\\Upsilon}"),
195            ("\\varPhi", "\\mathit{\\Phi}"),
196            ("\\varPsi", "\\mathit{\\Psi}"),
197            ("\\varOmega", "\\mathit{\\Omega}"),
198
199            // ── Spacing (mode-aware via \TextOrMath) ──
200            ("\\,", "\\TextOrMath{\\kern{.1667em}}{\\mskip{3mu}}"),
201            ("\\thinspace", "\\,"),
202            ("\\>", "\\mskip{4mu}"),
203            ("\\:", "\\TextOrMath{\\kern{.2222em}}{\\mskip{4mu}}"),
204            ("\\medspace", "\\:"),
205            ("\\;", "\\TextOrMath{\\kern{.2777em}}{\\mskip{5mu}}"),
206            ("\\thickspace", "\\;"),
207            ("\\!", "\\TextOrMath{\\kern{-.1667em}}{\\mskip{-3mu}}"),
208            ("\\negthinspace", "\\!"),
209            ("\\negmedspace", "\\TextOrMath{\\kern{-.2222em}}{\\mskip{-4mu}}"),
210            ("\\negthickspace", "\\TextOrMath{\\kern{-.2777em}}{\\mskip{-5mu}}"),
211            ("\\enspace", "\\kern.5em "),
212            ("\\enskip", "\\hskip.5em\\relax"),
213            ("\\quad", "\\hskip1em\\relax"),
214            ("\\qquad", "\\hskip2em\\relax"),
215
216            // ── Newline ──
217            ("\\newline", "\\\\\\relax"),
218
219            // ── hspace ──
220            ("\\@hspace", "\\hskip #1\\relax"),
221            ("\\@hspacer", "\\rule{0pt}{0pt}\\hskip #1\\relax"),
222
223            // ── llap / rlap / clap ──
224            ("\\llap", "\\mathllap{\\textrm{#1}}"),
225            ("\\rlap", "\\mathrlap{\\textrm{#1}}"),
226            ("\\clap", "\\mathclap{\\textrm{#1}}"),
227
228            // ── Logos ──
229            ("\\TeX", "\\textrm{\\html@mathml{T\\kern-.1667em\\raisebox{-.5ex}{E}\\kern-.125emX}{TeX}}"),
230            ("\\LaTeX", "\\textrm{\\html@mathml{L\\kern-.36em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{LaTeX}}"),
231            ("\\KaTeX", "\\textrm{\\html@mathml{K\\kern-.17em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{KaTeX}}"),
232
233            // ── imath / jmath ──
234            ("\\imath", "\\html@mathml{\\@imath}{\u{0131}}"),
235            ("\\jmath", "\\html@mathml{\\@jmath}{\u{0237}}"),
236
237            // ── minuso ──
238            ("\\minuso", "\\mathbin{\\html@mathml{{\\mathrlap{\\mathchoice{\\kern{0.145em}}{\\kern{0.145em}}{\\kern{0.1015em}}{\\kern{0.0725em}}\\circ}{-}}}{\\char`\u{29B5}}}"),
239            ("\\clap", "\\mathclap{\\textrm{#1}}"),
240
241            // ── mathstrut / underbar ──
242            ("\\mathstrut", "\\vphantom{(}"),
243            ("\\underbar", "\\underline{\\text{#1}}"),
244
245            // ── Bbbk ──
246            ("\\Bbbk", "\\Bbb{k}"),
247
248            // ── substack ──
249            ("\\substack", "\\begin{subarray}{c}#1\\end{subarray}"),
250
251            // ── boxed ──
252            ("\\boxed", "\\fbox{$\\displaystyle{#1}$}"),
253
254            // ── colon ──
255            ("\\colon", "\\nobreak\\mskip2mu\\mathpunct{}\\mathchoice{\\mkern-3mu}{\\mkern-3mu}{}{}{:}\\mskip6mu\\relax"),
256
257            // ── dots (string-based) ──
258            ("\\dots", "\\cdots"),
259            ("\\cdots", "\\@cdots"),
260            ("\\dotsb", "\\cdots"),
261            ("\\dotsm", "\\cdots"),
262            ("\\dotsi", "\\!\\cdots"),
263            ("\\dotsx", "\\ldots\\,"),
264            ("\\dotsc", "\\ldots"),  // comma list: x,\dotsc,y
265            ("\\dotso", "\\ldots"),  // other
266            ("\\DOTSI", "\\relax"),
267            ("\\DOTSB", "\\relax"),
268            ("\\DOTSX", "\\relax"),
269
270            // ── negated relations / corners (→ symbol table \@xxx) ──
271            ("\\gvertneqq", "\\@gvertneqq"),
272            ("\\lvertneqq", "\\@lvertneqq"),
273            ("\\ngeqq", "\\@ngeqq"),
274            ("\\ngeqslant", "\\@ngeqslant"),
275            ("\\nleqq", "\\@nleqq"),
276            ("\\nleqslant", "\\@nleqslant"),
277            ("\\nshortmid", "\\@nshortmid"),
278            ("\\nshortparallel", "\\@nshortparallel"),
279            ("\\nsubseteqq", "\\@nsubseteqq"),
280            ("\\nsupseteqq", "\\@nsupseteqq"),
281            ("\\ulcorner", "\\@ulcorner"),
282            ("\\urcorner", "\\@urcorner"),
283            ("\\llcorner", "\\@llcorner"),
284            ("\\lrcorner", "\\@lrcorner"),
285            ("\\varsubsetneq", "\\@varsubsetneq"),
286            ("\\varsubsetneqq", "\\@varsubsetneqq"),
287            ("\\varsupsetneq", "\\@varsupsetneq"),
288            ("\\varsupsetneqq", "\\@varsupsetneqq"),
289
290            // ── delimiters / text (compose from existing) ──
291            // Match KaTeX `macros.ts` html@mathml first branch (STIX-style white tortoise brackets).
292            ("\\lBrace", "\\mathopen{\\{\\mkern-3.2mu[}"),
293            ("\\rBrace", "\\mathclose{]\\mkern-3.2mu\\}}"),
294            ("\\llbracket", "\\mathopen{[\\mkern-3.2mu[}"),
295            ("\\rrbracket", "\\mathclose{]\\mkern-3.2mu]}"),
296            ("\\copyright", "\\textcircled{c}"),
297            ("\\textregistered", "\\textcircled{\\scriptsize R}"),
298
299            // ── dddot / ddddot ──
300            ("\\dddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ...}}{#1}}"),
301            ("\\ddddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ....}}{#1}}"),
302
303            // ── vdots ──
304            ("\\vdots", "{\\varvdots\\rule{0pt}{15pt}}"),
305            ("\u{22ee}", "\\vdots"),
306
307            // ── bmod / pod / pmod / mod ──
308            ("\\bmod", "\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}\\mathbin{\\rm mod}\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}"),
309            ("\\pod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern8mu}{\\mkern8mu}{\\mkern8mu}(#1)"),
310            ("\\pmod", "\\pod{{\\rm mod}\\mkern6mu#1}"),
311            ("\\mod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern12mu}{\\mkern12mu}{\\mkern12mu}{\\rm mod}\\,\\,#1"),
312
313            // ── limsup / liminf / etc ──
314            ("\\limsup", "\\DOTSB\\operatorname*{lim\\,sup}"),
315            ("\\liminf", "\\DOTSB\\operatorname*{lim\\,inf}"),
316            ("\\injlim", "\\DOTSB\\operatorname*{inj\\,lim}"),
317            ("\\projlim", "\\DOTSB\\operatorname*{proj\\,lim}"),
318            ("\\varlimsup", "\\DOTSB\\operatorname*{\\overline{\\mathrm{lim}}}"),
319            ("\\varliminf", "\\DOTSB\\operatorname*{\\underline{\\mathrm{lim}}}"),
320            ("\\varinjlim", "\\DOTSB\\operatorname*{\\underrightarrow{\\mathrm{lim}}}"),
321            ("\\varprojlim", "\\DOTSB\\operatorname*{\\underleftarrow{\\mathrm{lim}}}"),
322
323            // ── statmath ──
324            ("\\argmin", "\\DOTSB\\operatorname*{arg\\,min}"),
325            ("\\argmax", "\\DOTSB\\operatorname*{arg\\,max}"),
326            ("\\plim", "\\DOTSB\\mathop{\\operatorname{plim}}\\limits"),
327
328            // ── mathtools colon variants ──
329            ("\\ordinarycolon", ":"),
330            ("\\vcentcolon", "\\mathrel{\\mathop\\ordinarycolon}"),
331            ("\\dblcolon", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon}}{\\mathop{\\char\"2237}}"),
332            ("\\coloneqq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2254}}"),
333            ("\\Coloneqq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2237\\char\"3d}}"),
334            ("\\coloneq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"3a\\char\"2212}}"),
335            ("\\Coloneq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"2237\\char\"2212}}"),
336            ("\\eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2255}}"),
337            ("\\Eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"3d\\char\"2237}}"),
338            ("\\eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2239}}"),
339            ("\\Eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"2212\\char\"2237}}"),
340            ("\\colonapprox", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"3a\\char\"2248}}"),
341            ("\\Colonapprox", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"2237\\char\"2248}}"),
342            ("\\colonsim", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"3a\\char\"223c}}"),
343            ("\\Colonsim", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"2237\\char\"223c}}"),
344
345            // ── colonequals alternate names ──
346            ("\\ratio", "\\vcentcolon"),
347            ("\\coloncolon", "\\dblcolon"),
348            ("\\colonequals", "\\coloneqq"),
349            ("\\coloncolonequals", "\\Coloneqq"),
350            ("\\equalscolon", "\\eqqcolon"),
351            ("\\equalscoloncolon", "\\Eqqcolon"),
352            ("\\colonminus", "\\coloneq"),
353            ("\\coloncolonminus", "\\Coloneq"),
354            ("\\minuscolon", "\\eqcolon"),
355            ("\\minuscoloncolon", "\\Eqcolon"),
356            ("\\coloncolonapprox", "\\Colonapprox"),
357            ("\\coloncolonsim", "\\Colonsim"),
358            ("\\simcolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
359            ("\\simcoloncolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
360            ("\\approxcolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
361            ("\\approxcoloncolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
362
363            // ── braket (string-based) ──
364            ("\\bra", "\\mathinner{\\langle{#1}|}"),
365            ("\\ket", "\\mathinner{|{#1}\\rangle}"),
366            ("\\braket", "\\mathinner{\\langle{#1}\\rangle}"),
367            ("\\Braket", "\\bra@ket{\\left\\langle}{\\,\\middle\\vert\\,}{\\,\\middle\\vert\\,}{\\right\\rangle}"),
368            ("\\Bra", "\\left\\langle#1\\right|"),
369            ("\\Ket", "\\left|#1\\right\\rangle"),
370
371            // ── texvc (MediaWiki) ──
372            ("\\darr", "\\downarrow"),
373            ("\\dArr", "\\Downarrow"),
374            ("\\Darr", "\\Downarrow"),
375            ("\\lang", "\\langle"),
376            ("\\rang", "\\rangle"),
377            ("\\uarr", "\\uparrow"),
378            ("\\uArr", "\\Uparrow"),
379            ("\\Uarr", "\\Uparrow"),
380            ("\\N", "\\mathbb{N}"),
381            ("\\R", "\\mathbb{R}"),
382            ("\\Z", "\\mathbb{Z}"),
383            ("\\alef", "\\aleph"),
384            ("\\alefsym", "\\aleph"),
385            ("\\Alpha", "\\mathrm{A}"),
386            ("\\Beta", "\\mathrm{B}"),
387            ("\\bull", "\\bullet"),
388            ("\\Chi", "\\mathrm{X}"),
389            ("\\clubs", "\\clubsuit"),
390            ("\\cnums", "\\mathbb{C}"),
391            ("\\Complex", "\\mathbb{C}"),
392            ("\\Dagger", "\\ddagger"),
393            ("\\diamonds", "\\diamondsuit"),
394            ("\\empty", "\\emptyset"),
395            ("\\Epsilon", "\\mathrm{E}"),
396            ("\\Eta", "\\mathrm{H}"),
397            ("\\exist", "\\exists"),
398            ("\\harr", "\\leftrightarrow"),
399            ("\\hArr", "\\Leftrightarrow"),
400            ("\\Harr", "\\Leftrightarrow"),
401            ("\\hearts", "\\heartsuit"),
402            ("\\image", "\\Im"),
403            ("\\infin", "\\infty"),
404            ("\\Iota", "\\mathrm{I}"),
405            ("\\isin", "\\in"),
406            ("\\Kappa", "\\mathrm{K}"),
407            ("\\larr", "\\leftarrow"),
408            ("\\lArr", "\\Leftarrow"),
409            ("\\Larr", "\\Leftarrow"),
410            ("\\lrarr", "\\leftrightarrow"),
411            ("\\lrArr", "\\Leftrightarrow"),
412            ("\\Lrarr", "\\Leftrightarrow"),
413            ("\\Mu", "\\mathrm{M}"),
414            ("\\natnums", "\\mathbb{N}"),
415            ("\\Nu", "\\mathrm{N}"),
416            ("\\Omicron", "\\mathrm{O}"),
417            ("\\plusmn", "\\pm"),
418            ("\\rarr", "\\rightarrow"),
419            ("\\rArr", "\\Rightarrow"),
420            ("\\Rarr", "\\Rightarrow"),
421            ("\\real", "\\Re"),
422            ("\\reals", "\\mathbb{R}"),
423            ("\\Reals", "\\mathbb{R}"),
424            ("\\Rho", "\\mathrm{P}"),
425            ("\\sdot", "\\cdot"),
426            ("\\sect", "\\S"),
427            ("\\spades", "\\spadesuit"),
428            ("\\sub", "\\subset"),
429            ("\\sube", "\\subseteq"),
430            ("\\supe", "\\supseteq"),
431            ("\\Tau", "\\mathrm{T}"),
432            ("\\thetasym", "\\vartheta"),
433            ("\\weierp", "\\wp"),
434            ("\\Zeta", "\\mathrm{Z}"),
435
436            // ── Khan Academy color aliases ──
437            ("\\blue", "\\textcolor{##6495ed}{#1}"),
438            ("\\orange", "\\textcolor{##ffa500}{#1}"),
439            ("\\pink", "\\textcolor{##ff00af}{#1}"),
440            ("\\red", "\\textcolor{##df0030}{#1}"),
441            ("\\green", "\\textcolor{##28ae7b}{#1}"),
442            ("\\gray", "\\textcolor{gray}{#1}"),
443            ("\\purple", "\\textcolor{##9d38bd}{#1}"),
444
445            // ── Unicode script letters ──
446            ("\u{212C}", "\\mathscr{B}"),
447            ("\u{2130}", "\\mathscr{E}"),
448            ("\u{2131}", "\\mathscr{F}"),
449            ("\u{210B}", "\\mathscr{H}"),
450            ("\u{2110}", "\\mathscr{I}"),
451            ("\u{2112}", "\\mathscr{L}"),
452            ("\u{2133}", "\\mathscr{M}"),
453            ("\u{211B}", "\\mathscr{R}"),
454            ("\u{212D}", "\\mathfrak{C}"),
455            ("\u{210C}", "\\mathfrak{H}"),
456            ("\u{2128}", "\\mathfrak{Z}"),
457
458            // ── notni ──
459            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
460
461            // ── actuarialangle ──
462            ("\\angln", "{\\angl n}"),
463
464            // ── set/Set (braket notation, simplified) ──
465            ("\\set", "\\bra@set{\\{\\,}{\\mid}{}{\\,\\}}"),
466            ("\\Set", "\\bra@set{\\left\\{\\:}{\\;\\middle\\vert\\;}{\\;\\middle\\Vert\\;}{\\:\\right\\}}"),
467
468            // ── KaTeX mhchem (\\tripledash for \\bond ~ forms) ──
469            (
470                "\\tripledash",
471                "{\\vphantom{-}\\raisebox{2.56mu}{$\\mkern2mu\\tiny\\text{-}\\mkern1mu\\text{-}\\mkern1mu\\text{-}\\mkern2mu$}}",
472            ),
473        ];
474
475        for &(name, expansion) in builtins {
476            self.macros.set(
477                name.to_string(),
478                MacroDefinition::Text(expansion.to_string()),
479            );
480        }
481
482        self.load_function_macros();
483    }
484
485    fn load_function_macros(&mut self) {
486        // \noexpand: mark the next token as non-expandable (only if expandable)
487        self.macros.set(
488            "\\noexpand".to_string(),
489            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
490                let mut tok = me.pop_token();
491                if me.is_expandable(&tok.text) {
492                    tok.noexpand = true;
493                    tok.treat_as_relax = true;
494                }
495                Ok(vec![tok])
496            }),
497        );
498
499        // \@firstoftwo{A}{B} → A
500        // NOTE: consume_args returns tokens in stack order (reversed).
501        // We return them as-is since expand_once does stack.extend(tokens).
502        self.macros.set(
503            "\\@firstoftwo".to_string(),
504            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
505                let args = me.consume_args(2)?;
506                Ok(args.into_iter().next().unwrap())
507            }),
508        );
509
510        // \@secondoftwo{A}{B} → B
511        self.macros.set(
512            "\\@secondoftwo".to_string(),
513            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
514                let args = me.consume_args(2)?;
515                Ok(args.into_iter().nth(1).unwrap())
516            }),
517        );
518
519        // \@ifnextchar{C}{T}{F}: peek; if next non-space == C then T else F
520        self.macros.set(
521            "\\@ifnextchar".to_string(),
522            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
523                let args = me.consume_args(3)?;
524                me.consume_spaces();
525                let next = me.future().text.clone();
526                let char_arg = &args[0];
527                // char_arg is reversed; the "first" char in original order is the last element
528                let char_text = char_arg.first().map_or("", |t| t.text.as_str());
529                if next == char_text {
530                    Ok(args[1].clone())
531                } else {
532                    Ok(args[2].clone())
533                }
534            }),
535        );
536
537        // \@ifstar{with-star}{without-star}: if next is * → consume * and use first arg
538        self.macros.set(
539            "\\@ifstar".to_string(),
540            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
541                let args = me.consume_args(2)?;
542                let next = me.future().text.clone();
543                if next == "*" {
544                    me.pop_token();
545                    Ok(args[0].clone())
546                } else {
547                    Ok(args[1].clone())
548                }
549            }),
550        );
551
552        // \TextOrMath{text-branch}{math-branch}: choose based on mode
553        self.macros.set(
554            "\\TextOrMath".to_string(),
555            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
556                let args = me.consume_args(2)?;
557                if me.mode == Mode::Text {
558                    Ok(args[0].clone())
559                } else {
560                    Ok(args[1].clone())
561                }
562            }),
563        );
564
565        // \html@mathml is registered as a function in htmlmathml.rs
566
567        // \newcommand{\name}[nargs]{body}
568        self.macros.set(
569            "\\newcommand".to_string(),
570            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
571                handle_newcommand(me, false, true)
572            }),
573        );
574
575        // \renewcommand{\name}[nargs]{body}
576        self.macros.set(
577            "\\renewcommand".to_string(),
578            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
579                handle_newcommand(me, true, false)
580            }),
581        );
582
583        // \providecommand{\name}[nargs]{body}
584        self.macros.set(
585            "\\providecommand".to_string(),
586            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
587                handle_newcommand(me, true, true)
588            }),
589        );
590
591        // \char: parse decimal/octal/hex/backtick number → \@char{N}
592        self.macros.set(
593            "\\char".to_string(),
594            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
595                let mut tok = me.pop_token();
596                let mut number: i64;
597                let base: Option<u32>;
598
599                if tok.text == "'" {
600                    base = Some(8);
601                    tok = me.pop_token();
602                } else if tok.text == "\"" {
603                    base = Some(16);
604                    tok = me.pop_token();
605                } else if tok.text == "`" {
606                    tok = me.pop_token();
607                    if tok.text.starts_with('\\') {
608                        number = tok.text.chars().nth(1).map_or(0, |c| c as i64);
609                    } else {
610                        number = tok.text.chars().next().map_or(0, |c| c as i64);
611                    }
612                    // Build \@char{N} tokens in reverse (stack order)
613                    let s = number.to_string();
614                    let loc = tok.loc.clone();
615                    let mut result = vec![Token::new("}", loc.start, loc.end)];
616                    for ch in s.chars().rev() {
617                        result.push(Token::new(ch.to_string(), loc.start, loc.end));
618                    }
619                    result.push(Token::new("{", loc.start, loc.end));
620                    result.push(Token::new("\\@char", loc.start, loc.end));
621                    return Ok(result);
622                } else {
623                    base = Some(10);
624                }
625
626                if let Some(b) = base {
627                    number = i64::from_str_radix(&tok.text, b).unwrap_or(0);
628                    loop {
629                        let next = me.future().text.clone();
630                        if let Ok(d) = i64::from_str_radix(&next, b) {
631                            me.pop_token();
632                            number = number * (b as i64) + d;
633                        } else {
634                            break;
635                        }
636                    }
637                } else {
638                    number = 0;
639                }
640
641                let s = number.to_string();
642                let loc = tok.loc.clone();
643                let mut result = vec![Token::new("}", loc.start, loc.end)];
644                for ch in s.chars().rev() {
645                    result.push(Token::new(ch.to_string(), loc.start, loc.end));
646                }
647                result.push(Token::new("{", loc.start, loc.end));
648                result.push(Token::new("\\@char", loc.start, loc.end));
649                Ok(result)
650            }),
651        );
652
653        // \operatorname: \@ifstar\operatornamewithlimits\operatorname@
654        self.macros.set(
655            "\\operatorname".to_string(),
656            MacroDefinition::Text(
657                "\\@ifstar\\operatornamewithlimits\\operatorname@".to_string(),
658            ),
659        );
660
661        // \message{...}: consume argument and discard (no-op)
662        self.macros.set(
663            "\\message".to_string(),
664            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
665                let _args = me.consume_args(1)?;
666                Ok(vec![])
667            }),
668        );
669
670        // \errmessage{...}: consume argument and discard (no-op)
671        self.macros.set(
672            "\\errmessage".to_string(),
673            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
674                let _args = me.consume_args(1)?;
675                Ok(vec![])
676            }),
677        );
678
679        // KaTeX HTML extensions: no-op (only render content, no HTML attributes).
680        // Not standard LaTeX; for compatibility we parse and expand to second argument only.
681        for name in &["\\htmlClass", "\\htmlData", "\\htmlId", "\\htmlStyle"] {
682            let name = (*name).to_string();
683            self.macros.set(
684                name.clone(),
685                MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
686                    let args = me.consume_args(2)?;
687                    let content = args[1].iter().cloned().rev().collect::<Vec<_>>();
688                    Ok(content)
689                }),
690            );
691        }
692
693        // \bra@ket: like \bra@set but replaces ALL | at depth 0 (for \Braket)
694        self.macros.set(
695            "\\bra@ket".to_string(),
696            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
697                let args = me.consume_args(4)?;
698                let left = args[0].clone();
699                let middle = args[1].clone();
700                let middle_double = args[2].clone();
701                let right = args[3].clone();
702
703                let content = me.consume_args(1)?;
704                let content = content.into_iter().next().unwrap();
705
706                // Convert stack-order (reversed) to logical order, replace all | at depth 0,
707                // then reverse back to stack order.
708                let logical: Vec<Token> = content.into_iter().rev().collect();
709                let mut new_logical: Vec<Token> = Vec::new();
710                let mut depth: i32 = 0;
711                let mut i = 0;
712                while i < logical.len() {
713                    let t = &logical[i];
714                    if t.text == "{" {
715                        depth += 1;
716                        new_logical.push(t.clone());
717                    } else if t.text == "}" {
718                        depth -= 1;
719                        new_logical.push(t.clone());
720                    } else if depth == 0 && t.text == "|" {
721                        // Check for || (double pipe) → middleDouble
722                        if !middle_double.is_empty()
723                            && i + 1 < logical.len()
724                            && logical[i + 1].text == "|"
725                        {
726                            // middle_double is in stack/reversed order; reverse to logical order
727                            new_logical.extend(middle_double.iter().rev().cloned());
728                            i += 2;
729                            continue;
730                        }
731                        // middle is in stack/reversed order; reverse to logical order
732                        new_logical.extend(middle.iter().rev().cloned());
733                    } else {
734                        new_logical.push(t.clone());
735                    }
736                    i += 1;
737                }
738
739                // Reverse back to stack order
740                let content_rev: Vec<Token> = new_logical.into_iter().rev().collect();
741
742                // Build: right + content + left (reversed for stack)
743                let mut to_expand = Vec::new();
744                to_expand.extend(right);
745                to_expand.extend(content_rev);
746                to_expand.extend(left);
747
748                me.begin_group();
749                let expanded = me.expand_tokens(to_expand)?;
750                me.end_group();
751
752                Ok(expanded)
753            }),
754        );
755
756        // \bra@set: braket set notation helper
757        // Only replaces the FIRST | with middle tokens (one-shot), matching KaTeX
758        self.macros.set(
759            "\\bra@set".to_string(),
760            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
761                let args = me.consume_args(4)?;
762                let left = args[0].clone();
763                let middle = args[1].clone();
764                let middle_double = args[2].clone();
765                let right = args[3].clone();
766
767                let content = me.consume_args(1)?;
768                let mut content = content.into_iter().next().unwrap();
769
770                // Scan content and replace only the first | at depth 0
771                // Content tokens are in reversed order (stack), so iterate from end
772                let mut depth: i32 = 0;
773                let mut _first_pipe_idx: Option<usize> = None;
774                // Tokens are reversed (last token first in vec), scan in logical order
775                for i in (0..content.len()).rev() {
776                    let t = &content[i];
777                    if t.text == "{" { depth += 1; }
778                    else if t.text == "}" { depth -= 1; }
779                    else if depth == 0 && t.text == "|" {
780                        // Check for || (double pipe) → middleDouble
781                        if !middle_double.is_empty() && i > 0 && content[i - 1].text == "|" {
782                            _first_pipe_idx = Some(i);
783                            // Replace || with middleDouble
784                            content.remove(i);
785                            content.remove(i - 1);
786                            let insert_at = if i >= 2 { i - 1 } else { 0 };
787                            for (j, tok) in middle_double.iter().enumerate() {
788                                content.insert(insert_at + j, tok.clone());
789                            }
790                            break;
791                        }
792                        _first_pipe_idx = Some(i);
793                        content.remove(i);
794                        for (j, tok) in middle.iter().enumerate() {
795                            content.insert(i + j, tok.clone());
796                        }
797                        break;
798                    }
799                }
800
801                // Build: right + content + left (reversed for stack)
802                let mut to_expand = Vec::new();
803                to_expand.extend(right);
804                to_expand.extend(content);
805                to_expand.extend(left);
806
807                me.begin_group();
808                let expanded = me.expand_tokens(to_expand)?;
809                me.end_group();
810
811                Ok(expanded)
812            }),
813        );
814
815        // \\ce / \\pu: KaTeX mhchem 3.3.0 (Rust port in `crate::mhchem`)
816        self.macros.set(
817            "\\ce".to_string(),
818            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
819                let args = me.consume_args(1)?;
820                let s = crate::mhchem::mhchem_arg_tokens_to_string(&args[0]);
821                let tex = crate::mhchem::chem_parse_str(&s, "ce")
822                    .map_err(|e| ParseError::msg(format!("\\ce: {e}")))?;
823                Ok(lex_string_to_stack_tokens(&tex))
824            }),
825        );
826        self.macros.set(
827            "\\pu".to_string(),
828            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
829                let args = me.consume_args(1)?;
830                let s = crate::mhchem::mhchem_arg_tokens_to_string(&args[0]);
831                let tex = crate::mhchem::chem_parse_str(&s, "pu")
832                    .map_err(|e| ParseError::msg(format!("\\pu: {e}")))?;
833                Ok(lex_string_to_stack_tokens(&tex))
834            }),
835        );
836    }
837
838    pub fn set_macro(&mut self, name: String, def: MacroDefinition) {
839        self.macros.set(name, def);
840    }
841
842    pub fn set_macro_global(&mut self, name: String, def: MacroDefinition) {
843        self.macros.set_global(name, def);
844    }
845
846    pub fn set_text_macro(&mut self, name: &str, text: &str) {
847        self.macros.set(
848            name.to_string(),
849            MacroDefinition::Text(text.to_string()),
850        );
851    }
852
853    pub fn get_macro(&self, name: &str) -> Option<&MacroDefinition> {
854        self.macros.get(name)
855    }
856
857    /// Expand a list of tokens fully (for \edef/\xdef).
858    pub fn expand_tokens(&mut self, tokens: Vec<Token>) -> ParseResult<Vec<Token>> {
859        let saved_stack = std::mem::take(&mut self.stack);
860        self.stack = tokens;
861
862        let mut result = Vec::new();
863        loop {
864            if self.stack.is_empty() {
865                break;
866            }
867            let expanded = self.expand_once(false)?;
868            if !expanded {
869                if let Some(tok) = self.stack.pop() {
870                    if tok.is_eof() {
871                        break;
872                    }
873                    result.push(tok);
874                }
875            }
876        }
877
878        self.stack = saved_stack;
879        result.reverse();
880        Ok(result)
881    }
882
883    pub fn switch_mode(&mut self, new_mode: Mode) {
884        self.mode = new_mode;
885    }
886
887    pub fn begin_group(&mut self) {
888        self.macros.begin_group();
889    }
890
891    pub fn end_group(&mut self) {
892        self.macros.end_group();
893    }
894
895    pub fn end_groups(&mut self) {
896        self.macros.end_groups();
897    }
898
899    /// Returns the topmost token on the stack, without expanding it.
900    pub fn future(&mut self) -> &Token {
901        if self.stack.is_empty() {
902            let tok = self.lexer.lex();
903            self.stack.push(tok);
904        }
905        self.stack.last().unwrap()
906    }
907
908    /// Remove and return the next unexpanded token.
909    pub fn pop_token(&mut self) -> Token {
910        self.future();
911        self.stack.pop().unwrap()
912    }
913
914    /// Modify the top token's text on the stack (for \global prefix handling).
915    pub fn set_top_text(&mut self, text: String) {
916        self.future();
917        if let Some(tok) = self.stack.last_mut() {
918            tok.text = text;
919        }
920    }
921
922    /// Push a token onto the stack.
923    pub fn push_token(&mut self, token: Token) {
924        self.stack.push(token);
925    }
926
927    /// Push multiple tokens onto the stack.
928    pub fn push_tokens(&mut self, tokens: Vec<Token>) {
929        self.stack.extend(tokens);
930    }
931
932    /// Consume all following space tokens, without expansion.
933    pub fn consume_spaces(&mut self) {
934        loop {
935            let is_space = self.future().text == " ";
936            if is_space {
937                self.stack.pop();
938            } else {
939                break;
940            }
941        }
942    }
943
944    /// Expand the next token once if possible.
945    /// Returns Ok(true) if expanded, Ok(false) if not expandable.
946    fn expand_once(&mut self, expandable_only: bool) -> ParseResult<bool> {
947        let top_token = self.pop_token();
948        let name = &top_token.text;
949
950        if top_token.noexpand {
951            self.push_token(top_token);
952            return Ok(false);
953        }
954
955        // Check for function-based macro first — always expandable
956        if let Some(MacroDefinition::Function(handler)) = self.macros.get(name).cloned() {
957            self.count_expansion(1)?;
958            let tokens = handler(self)?;
959            self.stack.extend(tokens);
960            return Ok(true);
961        }
962
963        let expansion = self.get_expansion(name);
964        match expansion {
965            None => {
966                if expandable_only && name.starts_with('\\') && !self.is_defined(name) {
967                    return Err(ParseError::new(
968                        format!("Undefined control sequence: {}", name),
969                        Some(&top_token),
970                    ));
971                }
972                self.push_token(top_token);
973                Ok(false)
974            }
975            Some(exp) if expandable_only && exp.unexpandable => {
976                self.push_token(top_token);
977                Ok(false)
978            }
979            Some(exp) => {
980                self.count_expansion(1)?;
981                let mut tokens = exp.tokens;
982                if exp.num_args > 0 {
983                    let args = self.consume_args(exp.num_args)?;
984                    tokens = self.substitute_args(tokens, &args);
985                }
986                self.stack.extend(tokens);
987                Ok(true)
988            }
989        }
990    }
991
992    fn substitute_args(&self, mut tokens: Vec<Token>, args: &[Vec<Token>]) -> Vec<Token> {
993        let mut i = tokens.len();
994        while i > 0 {
995            i -= 1;
996            if tokens[i].text == "#" && i > 0 {
997                let next = &tokens[i - 1];
998                if next.text == "#" {
999                    tokens.remove(i);
1000                    i -= 1;
1001                } else if let Ok(n) = next.text.parse::<usize>() {
1002                    if n >= 1 && n <= args.len() {
1003                        tokens.remove(i);
1004                        tokens.remove(i - 1);
1005                        let arg_tokens = &args[n - 1];
1006                        for (j, t) in arg_tokens.iter().enumerate() {
1007                            tokens.insert(i - 1 + j, t.clone());
1008                        }
1009                        i = i.saturating_sub(1);
1010                    }
1011                }
1012            }
1013        }
1014        tokens
1015    }
1016
1017    fn get_expansion(&self, name: &str) -> Option<MacroExpansion> {
1018        let def = self.macros.get(name)?;
1019
1020        if name.len() == 1 {
1021            let ch = name.chars().next().unwrap();
1022            let catcode = self.lexer_catcode(ch);
1023            if catcode != 0 && catcode != 13 {
1024                return None;
1025            }
1026        }
1027
1028        match def {
1029            MacroDefinition::Text(text) => {
1030                let mut num_args = 0;
1031                let stripped = text.replace("##", "");
1032                while stripped.contains(&format!("#{}", num_args + 1)) {
1033                    num_args += 1;
1034                }
1035                let mut body_lexer = Lexer::new(text);
1036                let mut tokens = Vec::new();
1037                loop {
1038                    let tok = body_lexer.lex();
1039                    if tok.is_eof() {
1040                        break;
1041                    }
1042                    tokens.push(tok);
1043                }
1044                tokens.reverse();
1045                Some(MacroExpansion {
1046                    tokens,
1047                    num_args,
1048                    unexpandable: false,
1049                })
1050            }
1051            MacroDefinition::Tokens { tokens, num_args } => Some(MacroExpansion {
1052                tokens: tokens.clone(),
1053                num_args: *num_args,
1054                unexpandable: false,
1055            }),
1056            MacroDefinition::Function(_) => {
1057                // Signal that this is a function macro; handled in expand_once
1058                Some(MacroExpansion {
1059                    tokens: vec![],
1060                    num_args: 0,
1061                    unexpandable: false,
1062                })
1063            }
1064        }
1065    }
1066
1067    fn lexer_catcode(&self, ch: char) -> u8 {
1068        self.lexer.get_catcode(ch)
1069    }
1070
1071    fn count_expansion(&mut self, amount: usize) -> ParseResult<()> {
1072        self.expansion_count += amount;
1073        if self.expansion_count > self.max_expand {
1074            Err(ParseError::msg(
1075                "Too many expansions: infinite loop or need to increase maxExpand setting",
1076            ))
1077        } else {
1078            Ok(())
1079        }
1080    }
1081
1082    /// Recursively expand the next token until a non-expandable token is found.
1083    pub fn expand_next_token(&mut self) -> ParseResult<Token> {
1084        loop {
1085            let expanded = self.expand_once(false)?;
1086            if !expanded {
1087                let mut token = self.stack.pop().unwrap();
1088                if token.treat_as_relax {
1089                    token.text = "\\relax".to_string();
1090                }
1091                return Ok(token);
1092            }
1093        }
1094    }
1095
1096    /// Consume a single argument from the token stream.
1097    pub fn consume_arg(&mut self, delims: Option<&[&str]>) -> ParseResult<ConsumedArg> {
1098        let is_delimited = delims.is_some_and(|d| !d.is_empty());
1099        if !is_delimited {
1100            self.consume_spaces();
1101        }
1102
1103        let start = self.future().clone();
1104        let mut tokens = Vec::new();
1105        let mut depth: i32 = 0;
1106        let mut end_tok;
1107
1108        loop {
1109            let tok = self.pop_token();
1110            end_tok = tok.clone();
1111            tokens.push(tok.clone());
1112
1113            if tok.text == "{" {
1114                depth += 1;
1115            } else if tok.text == "}" {
1116                depth -= 1;
1117                if depth == -1 {
1118                    return Err(ParseError::new("Extra }", Some(&tok)));
1119                }
1120            } else if tok.is_eof() {
1121                return Err(ParseError::new(
1122                    "Unexpected end of input in a macro argument",
1123                    Some(&tok),
1124                ));
1125            }
1126
1127            if depth == 0 && !is_delimited {
1128                break;
1129            }
1130
1131            if let Some(delims) = delims {
1132                if is_delimited && depth == 0 {
1133                    if let Some(last) = delims.last() {
1134                        if tok.text == *last {
1135                            tokens.pop();
1136                            break;
1137                        }
1138                    }
1139                }
1140            }
1141        }
1142
1143        if start.text == "{" && tokens.last().is_some_and(|t| t.text == "}") {
1144            tokens.pop();
1145            tokens.remove(0);
1146        }
1147
1148        tokens.reverse();
1149
1150        Ok(ConsumedArg {
1151            tokens,
1152            start,
1153            end: end_tok,
1154        })
1155    }
1156
1157    /// Consume N arguments.
1158    fn consume_args(&mut self, num_args: usize) -> ParseResult<Vec<Vec<Token>>> {
1159        let mut args = Vec::with_capacity(num_args);
1160        for _ in 0..num_args {
1161            let arg = self.consume_arg(None)?;
1162            args.push(arg.tokens);
1163        }
1164        Ok(args)
1165    }
1166
1167    /// Scan a function argument (optional or mandatory).
1168    /// Pushes an EOF token to mark the end, then pushes the argument tokens.
1169    pub fn scan_argument(&mut self, is_optional: bool) -> ParseResult<Option<Token>> {
1170        if is_optional {
1171            self.consume_spaces();
1172            if self.future().text != "[" {
1173                return Ok(None);
1174            }
1175            let start = self.pop_token();
1176            let arg = self.consume_arg(Some(&["]"]))?;
1177            let end = &arg.end;
1178            let end_loc = end.loc.clone();
1179
1180            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1181            self.push_tokens(arg.tokens);
1182
1183            let result = Token {
1184                text: String::new(),
1185                loc: SourceLocation::range(&start.loc, &end_loc),
1186                noexpand: false,
1187                treat_as_relax: false,
1188            };
1189            Ok(Some(result))
1190        } else {
1191            let arg = self.consume_arg(None)?;
1192            let end_loc = arg.end.loc.clone();
1193
1194            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1195            self.push_tokens(arg.tokens);
1196
1197            let result = Token {
1198                text: String::new(),
1199                loc: SourceLocation::range(&arg.start.loc, &end_loc),
1200                noexpand: false,
1201                treat_as_relax: false,
1202            };
1203            Ok(Some(result))
1204        }
1205    }
1206
1207    /// Check if a command name is currently defined.
1208    pub fn is_defined(&self, name: &str) -> bool {
1209        self.macros.has(name)
1210            || FUNCTIONS.contains_key(name)
1211            || is_known_symbol(name)
1212            || IMPLICIT_COMMANDS.contains(&name)
1213    }
1214
1215    /// Check if a command is expandable.
1216    pub fn is_expandable(&self, name: &str) -> bool {
1217        if let Some(_def) = self.macros.get(name) {
1218            return true;
1219        }
1220        if let Some(func) = FUNCTIONS.get(name) {
1221            return !func.primitive;
1222        }
1223        false
1224    }
1225}
1226
1227pub struct ConsumedArg {
1228    pub tokens: Vec<Token>,
1229    pub start: Token,
1230    pub end: Token,
1231}
1232
1233fn handle_newcommand(
1234    me: &mut MacroExpander,
1235    exists_ok: bool,
1236    nonexists_ok: bool,
1237) -> ParseResult<Vec<Token>> {
1238    let name_arg = me.consume_arg(None)?;
1239    // name_arg.tokens is reversed (stack order); last element = first token in original
1240    let name = name_arg.tokens.last().map_or_else(String::new, |t| t.text.clone());
1241
1242    let exists = me.is_defined(&name);
1243    if exists && !exists_ok {
1244        return Err(ParseError::msg(format!(
1245            "\\newcommand{{{}}} attempting to redefine {}; use \\renewcommand",
1246            name, name
1247        )));
1248    }
1249    if !exists && !nonexists_ok {
1250        return Err(ParseError::msg(format!(
1251            "\\renewcommand{{{}}} when command {} does not yet exist; use \\newcommand",
1252            name, name
1253        )));
1254    }
1255
1256    me.consume_spaces();
1257    let mut num_args = 0usize;
1258    if me.future().text == "[" {
1259        me.pop_token();
1260        let narg_tok = me.pop_token();
1261        num_args = narg_tok.text.parse().unwrap_or(0);
1262        let close = me.pop_token();
1263        if close.text != "]" {
1264            return Err(ParseError::msg("Expected ] in \\newcommand"));
1265        }
1266    }
1267
1268    let body_arg = me.consume_arg(None)?;
1269    let tokens = body_arg.tokens;
1270
1271    me.set_macro(name, MacroDefinition::Tokens { tokens, num_args });
1272    Ok(vec![])
1273}
1274
1275fn is_known_symbol(name: &str) -> bool {
1276    use ratex_font::symbols;
1277    symbols::get_symbol(name, symbols::Mode::Math).is_some()
1278        || symbols::get_symbol(name, symbols::Mode::Text).is_some()
1279}