Skip to main content

ratex_parser/
macro_expander.rs

1use std::collections::HashMap;
2
3use ratex_lexer::token::{SourceLocation, Token};
4use ratex_lexer::Lexer;
5
6use crate::error::{ParseError, ParseResult};
7use crate::functions::FUNCTIONS;
8use crate::parse_node::Mode;
9
10/// Commands that act like macros but aren't defined as a macro, function, or symbol.
11/// Used in `is_defined`.
12pub static IMPLICIT_COMMANDS: &[&str] = &["^", "_", "\\limits", "\\nolimits"];
13
14/// Handler type for function-based macros (e.g. \TextOrMath, \@ifstar).
15/// Takes the MacroExpander mutably and returns tokens to push onto the stack.
16pub type FnMacroHandler = fn(&mut MacroExpander) -> ParseResult<Vec<Token>>;
17
18/// A macro definition: string template, token list, or function.
19#[derive(Clone)]
20pub enum MacroDefinition {
21    /// Simple string expansion (e.g., `\def\foo{bar}` → "bar")
22    Text(String),
23    /// Pre-tokenized expansion with argument count
24    Tokens {
25        tokens: Vec<Token>,
26        num_args: usize,
27    },
28    /// Function-based macro (consumes tokens directly, returns expansion)
29    Function(FnMacroHandler),
30}
31
32impl std::fmt::Debug for MacroDefinition {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            Self::Text(s) => write!(f, "Text({:?})", s),
36            Self::Tokens { tokens, num_args } => {
37                write!(f, "Tokens {{ {} tokens, {} args }}", tokens.len(), num_args)
38            }
39            Self::Function(_) => write!(f, "Function(...)"),
40        }
41    }
42}
43
44/// Result of expanding a macro once.
45struct MacroExpansion {
46    tokens: Vec<Token>,
47    num_args: usize,
48    unexpandable: bool,
49}
50
51/// The MacroExpander (or "gullet") manages macro expansion.
52///
53/// It sits between the Lexer (mouth) and the Parser (stomach).
54/// Tokens are read from the lexer, pushed onto an internal stack,
55/// and macros are expanded until only non-expandable tokens remain.
56///
57/// Modeled after KaTeX's MacroExpander.ts.
58pub struct MacroExpander<'a> {
59    pub lexer: Lexer<'a>,
60    pub mode: Mode,
61    stack: Vec<Token>,
62    macros: MacroNamespace,
63    expansion_count: usize,
64    max_expand: usize,
65}
66
67/// Scoped macro namespace supporting group nesting.
68struct MacroNamespace {
69    current: HashMap<String, MacroDefinition>,
70    group_stack: Vec<HashMap<String, Option<MacroDefinition>>>,
71}
72
73impl MacroNamespace {
74    fn new() -> Self {
75        Self {
76            current: HashMap::new(),
77            group_stack: Vec::new(),
78        }
79    }
80
81    fn get(&self, name: &str) -> Option<&MacroDefinition> {
82        self.current.get(name)
83    }
84
85    fn set(&mut self, name: String, def: MacroDefinition) {
86        if let Some(undo) = self.group_stack.last_mut() {
87            undo.entry(name.clone()).or_insert_with(|| self.current.get(&name).cloned());
88        }
89        self.current.insert(name, def);
90    }
91
92    fn set_global(&mut self, name: String, def: MacroDefinition) {
93        self.current.insert(name, def);
94    }
95
96    fn has(&self, name: &str) -> bool {
97        self.current.contains_key(name)
98    }
99
100    fn begin_group(&mut self) {
101        self.group_stack.push(HashMap::new());
102    }
103
104    fn end_group(&mut self) {
105        if let Some(undo) = self.group_stack.pop() {
106            for (name, old_val) in undo {
107                match old_val {
108                    Some(def) => { self.current.insert(name, def); }
109                    None => { self.current.remove(&name); }
110                }
111            }
112        }
113    }
114
115    fn end_groups(&mut self) {
116        while !self.group_stack.is_empty() {
117            self.end_group();
118        }
119    }
120}
121
122/// Tokenize a macro expansion string into stack order (same as [`MacroDefinition::Text`] bodies).
123fn lex_string_to_stack_tokens(text: &str) -> Vec<Token> {
124    let mut body_lexer = Lexer::new(text);
125    let mut tokens = Vec::new();
126    loop {
127        let tok = body_lexer.lex();
128        if tok.is_eof() {
129            break;
130        }
131        tokens.push(tok);
132    }
133    tokens.reverse();
134    tokens
135}
136
137impl<'a> MacroExpander<'a> {
138    pub fn new(input: &'a str, mode: Mode) -> Self {
139        let mut me = Self {
140            lexer: Lexer::new(input),
141            mode,
142            stack: Vec::new(),
143            macros: MacroNamespace::new(),
144            expansion_count: 0,
145            max_expand: 1000,
146        };
147        me.load_builtins();
148        me
149    }
150
151    fn load_builtins(&mut self) {
152        let builtins: &[(&str, &str)] = &[
153            // ── Grouping ──
154            ("\\bgroup", "{"),
155            ("\\egroup", "}"),
156
157            // ── Symbols from latex.ltx ──
158            ("\\lq", "`"),
159            ("\\rq", "'"),
160            // \lbrack and \rbrack are in the symbol table directly
161            ("\\aa", "\\r a"),
162            ("\\AA", "\\r A"),
163
164            // ── Active characters ──
165            ("~", "\\nobreakspace"),
166
167            // ── Phantoms ──
168            ("\\hphantom", "\\smash{\\phantom{#1}}"),
169
170            // ── Negated symbols ──
171            ("\\not", "\\html@mathml{\\mathrel{\\mathrlap\\@not}\\nobreak}{\\char\"338}"),
172            ("\\neq", "\\html@mathml{\\mathrel{\\not=}}{\\mathrel{\\char`≠}}"),
173            ("\\ne", "\\neq"),
174            ("\u{2260}", "\\neq"),
175            ("\\notin", "\\html@mathml{\\mathrel{{\\in}\\mathllap{/\\mskip1mu}}}{\\mathrel{\\char`∉}}"),
176            ("\u{2209}", "\\notin"),
177            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
178            ("\u{220C}", "\\notni"),
179            // \le and \ge are in the symbol table directly, not macros
180
181            // ── amsmath iff/implies ──
182            ("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;"),
183            ("\\implies", "\\DOTSB\\;\\Longrightarrow\\;"),
184            ("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;"),
185
186            // ── Italic Greek capitals ──
187            ("\\varGamma", "\\mathit{\\Gamma}"),
188            ("\\varDelta", "\\mathit{\\Delta}"),
189            ("\\varTheta", "\\mathit{\\Theta}"),
190            ("\\varLambda", "\\mathit{\\Lambda}"),
191            ("\\varXi", "\\mathit{\\Xi}"),
192            ("\\varPi", "\\mathit{\\Pi}"),
193            ("\\varSigma", "\\mathit{\\Sigma}"),
194            ("\\varUpsilon", "\\mathit{\\Upsilon}"),
195            ("\\varPhi", "\\mathit{\\Phi}"),
196            ("\\varPsi", "\\mathit{\\Psi}"),
197            ("\\varOmega", "\\mathit{\\Omega}"),
198
199            // ── Spacing (mode-aware via \TextOrMath) ──
200            ("\\,", "\\TextOrMath{\\kern{.1667em}}{\\mskip{3mu}}"),
201            ("\\thinspace", "\\,"),
202            ("\\>", "\\mskip{4mu}"),
203            ("\\:", "\\TextOrMath{\\kern{.2222em}}{\\mskip{4mu}}"),
204            ("\\medspace", "\\:"),
205            ("\\;", "\\TextOrMath{\\kern{.2777em}}{\\mskip{5mu}}"),
206            ("\\thickspace", "\\;"),
207            ("\\!", "\\TextOrMath{\\kern{-.1667em}}{\\mskip{-3mu}}"),
208            ("\\negthinspace", "\\!"),
209            ("\\negmedspace", "\\TextOrMath{\\kern{-.2222em}}{\\mskip{-4mu}}"),
210            ("\\negthickspace", "\\TextOrMath{\\kern{-.2777em}}{\\mskip{-5mu}}"),
211            ("\\enspace", "\\kern.5em "),
212            ("\\enskip", "\\hskip.5em\\relax"),
213            ("\\quad", "\\hskip1em\\relax"),
214            ("\\qquad", "\\hskip2em\\relax"),
215
216            // ── Newline ──
217            ("\\newline", "\\\\\\relax"),
218
219            // ── hspace ──
220            ("\\@hspace", "\\hskip #1\\relax"),
221            ("\\@hspacer", "\\rule{0pt}{0pt}\\hskip #1\\relax"),
222
223            // ── llap / rlap / clap ──
224            ("\\llap", "\\mathllap{\\textrm{#1}}"),
225            ("\\rlap", "\\mathrlap{\\textrm{#1}}"),
226            ("\\clap", "\\mathclap{\\textrm{#1}}"),
227
228            // ── Logos ──
229            ("\\TeX", "\\textrm{\\html@mathml{T\\kern-.1667em\\raisebox{-.5ex}{E}\\kern-.125emX}{TeX}}"),
230            ("\\LaTeX", "\\textrm{\\html@mathml{L\\kern-.36em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{LaTeX}}"),
231            ("\\KaTeX", "\\textrm{\\html@mathml{K\\kern-.17em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{KaTeX}}"),
232
233            // ── imath / jmath ──
234            ("\\imath", "\\html@mathml{\\@imath}{\u{0131}}"),
235            ("\\jmath", "\\html@mathml{\\@jmath}{\u{0237}}"),
236
237            // ── minuso ──
238            ("\\minuso", "\\mathbin{\\html@mathml{{\\mathrlap{\\mathchoice{\\kern{0.145em}}{\\kern{0.145em}}{\\kern{0.1015em}}{\\kern{0.0725em}}\\circ}{-}}}{\\char`\u{29B5}}}"),
239            ("\\clap", "\\mathclap{\\textrm{#1}}"),
240
241            // ── mathstrut / underbar ──
242            ("\\mathstrut", "\\vphantom{(}"),
243            ("\\underbar", "\\underline{\\text{#1}}"),
244
245            // ── Bbbk ──
246            ("\\Bbbk", "\\Bbb{k}"),
247
248            // ── substack ──
249            ("\\substack", "\\begin{subarray}{c}#1\\end{subarray}"),
250
251            // ── boxed ──
252            ("\\boxed", "\\fbox{$\\displaystyle{#1}$}"),
253
254            // ── colon ──
255            ("\\colon", "\\nobreak\\mskip2mu\\mathpunct{}\\mathchoice{\\mkern-3mu}{\\mkern-3mu}{}{}{:}\\mskip6mu\\relax"),
256
257            // ── dots (string-based) ──
258            ("\\dots", "\\cdots"),
259            ("\\cdots", "\\@cdots"),
260            ("\\dotsb", "\\cdots"),
261            ("\\dotsm", "\\cdots"),
262            ("\\dotsi", "\\!\\cdots"),
263            ("\\dotsx", "\\ldots\\,"),
264            ("\\dotsc", "\\ldots"),  // comma list: x,\dotsc,y
265            ("\\dotso", "\\ldots"),  // other
266            ("\\DOTSI", "\\relax"),
267            ("\\DOTSB", "\\relax"),
268            ("\\DOTSX", "\\relax"),
269
270            // ── negated relations / corners (→ symbol table \@xxx) ──
271            ("\\gvertneqq", "\\@gvertneqq"),
272            ("\\lvertneqq", "\\@lvertneqq"),
273            ("\\ngeqq", "\\@ngeqq"),
274            ("\\ngeqslant", "\\@ngeqslant"),
275            ("\\nleqq", "\\@nleqq"),
276            ("\\nleqslant", "\\@nleqslant"),
277            ("\\nshortmid", "\\@nshortmid"),
278            ("\\nshortparallel", "\\@nshortparallel"),
279            ("\\nsubseteqq", "\\@nsubseteqq"),
280            ("\\nsupseteqq", "\\@nsupseteqq"),
281            ("\\ulcorner", "\\@ulcorner"),
282            ("\\urcorner", "\\@urcorner"),
283            ("\\llcorner", "\\@llcorner"),
284            ("\\lrcorner", "\\@lrcorner"),
285            ("\\varsubsetneq", "\\@varsubsetneq"),
286            ("\\varsubsetneqq", "\\@varsubsetneqq"),
287            ("\\varsupsetneq", "\\@varsupsetneq"),
288            ("\\varsupsetneqq", "\\@varsupsetneqq"),
289
290            // ── delimiters / text (compose from existing) ──
291            // Match KaTeX `macros.ts` html@mathml first branch (STIX-style white tortoise brackets).
292            ("\\lBrace", "\\mathopen{\\{\\mkern-3.2mu[}"),
293            ("\\rBrace", "\\mathclose{]\\mkern-3.2mu\\}}"),
294            ("\\llbracket", "\\mathopen{[\\mkern-3.2mu[}"),
295            ("\\rrbracket", "\\mathclose{]\\mkern-3.2mu]}"),
296            ("\\copyright", "\\textcircled{c}"),
297            ("\\textregistered", "\\textcircled{\\scriptsize R}"),
298
299            // ── dddot / ddddot ──
300            ("\\dddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ...}}{#1}}"),
301            ("\\ddddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ....}}{#1}}"),
302
303            // ── vdots ──
304            ("\\vdots", "{\\varvdots\\rule{0pt}{15pt}}"),
305            ("\u{22ee}", "\\vdots"),
306
307            // ── bmod / pod / pmod / mod ──
308            ("\\bmod", "\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}\\mathbin{\\rm mod}\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}"),
309            ("\\pod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern8mu}{\\mkern8mu}{\\mkern8mu}(#1)"),
310            ("\\pmod", "\\pod{{\\rm mod}\\mkern6mu#1}"),
311            ("\\mod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern12mu}{\\mkern12mu}{\\mkern12mu}{\\rm mod}\\,\\,#1"),
312
313            // ── limsup / liminf / etc ──
314            ("\\limsup", "\\DOTSB\\operatorname*{lim\\,sup}"),
315            ("\\liminf", "\\DOTSB\\operatorname*{lim\\,inf}"),
316            ("\\injlim", "\\DOTSB\\operatorname*{inj\\,lim}"),
317            ("\\projlim", "\\DOTSB\\operatorname*{proj\\,lim}"),
318            ("\\varlimsup", "\\DOTSB\\operatorname*{\\overline{\\mathrm{lim}}}"),
319            ("\\varliminf", "\\DOTSB\\operatorname*{\\underline{\\mathrm{lim}}}"),
320            ("\\varinjlim", "\\DOTSB\\operatorname*{\\underrightarrow{\\mathrm{lim}}}"),
321            ("\\varprojlim", "\\DOTSB\\operatorname*{\\underleftarrow{\\mathrm{lim}}}"),
322
323            // ── statmath ──
324            ("\\argmin", "\\DOTSB\\operatorname*{arg\\,min}"),
325            ("\\argmax", "\\DOTSB\\operatorname*{arg\\,max}"),
326            ("\\plim", "\\DOTSB\\mathop{\\operatorname{plim}}\\limits"),
327
328            // ── mathtools colon variants ──
329            ("\\ordinarycolon", ":"),
330            ("\\vcentcolon", "\\mathrel{\\mathop\\ordinarycolon}"),
331            ("\\dblcolon", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon}}{\\mathop{\\char\"2237}}"),
332            ("\\coloneqq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2254}}"),
333            ("\\Coloneqq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2237\\char\"3d}}"),
334            ("\\coloneq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"3a\\char\"2212}}"),
335            ("\\Coloneq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"2237\\char\"2212}}"),
336            ("\\eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2255}}"),
337            ("\\Eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"3d\\char\"2237}}"),
338            ("\\eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2239}}"),
339            ("\\Eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"2212\\char\"2237}}"),
340            ("\\colonapprox", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"3a\\char\"2248}}"),
341            ("\\Colonapprox", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"2237\\char\"2248}}"),
342            ("\\colonsim", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"3a\\char\"223c}}"),
343            ("\\Colonsim", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"2237\\char\"223c}}"),
344
345            // ── colonequals alternate names ──
346            ("\\ratio", "\\vcentcolon"),
347            ("\\coloncolon", "\\dblcolon"),
348            ("\\colonequals", "\\coloneqq"),
349            ("\\coloncolonequals", "\\Coloneqq"),
350            ("\\equalscolon", "\\eqqcolon"),
351            ("\\equalscoloncolon", "\\Eqqcolon"),
352            ("\\colonminus", "\\coloneq"),
353            ("\\coloncolonminus", "\\Coloneq"),
354            ("\\minuscolon", "\\eqcolon"),
355            ("\\minuscoloncolon", "\\Eqcolon"),
356            ("\\coloncolonapprox", "\\Colonapprox"),
357            ("\\coloncolonsim", "\\Colonsim"),
358            ("\\simcolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
359            ("\\simcoloncolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
360            ("\\approxcolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
361            ("\\approxcoloncolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
362
363            // ── braket (string-based) ──
364            ("\\bra", "\\mathinner{\\langle{#1}|}"),
365            ("\\ket", "\\mathinner{|{#1}\\rangle}"),
366            ("\\braket", "\\mathinner{\\langle{#1}\\rangle}"),
367            ("\\Braket", "\\bra@ket{\\left\\langle}{\\,\\middle\\vert\\,}{\\,\\middle\\vert\\,}{\\right\\rangle}"),
368            ("\\Bra", "\\left\\langle#1\\right|"),
369            ("\\Ket", "\\left|#1\\right\\rangle"),
370
371            // ── texvc (MediaWiki) ──
372            ("\\darr", "\\downarrow"),
373            ("\\dArr", "\\Downarrow"),
374            ("\\Darr", "\\Downarrow"),
375            ("\\lang", "\\langle"),
376            ("\\rang", "\\rangle"),
377            ("\\uarr", "\\uparrow"),
378            ("\\uArr", "\\Uparrow"),
379            ("\\Uarr", "\\Uparrow"),
380            ("\\N", "\\mathbb{N}"),
381            ("\\R", "\\mathbb{R}"),
382            ("\\Z", "\\mathbb{Z}"),
383            ("\\alef", "\\aleph"),
384            ("\\alefsym", "\\aleph"),
385            ("\\Alpha", "\\mathrm{A}"),
386            ("\\Beta", "\\mathrm{B}"),
387            ("\\bull", "\\bullet"),
388            ("\\Chi", "\\mathrm{X}"),
389            ("\\clubs", "\\clubsuit"),
390            ("\\cnums", "\\mathbb{C}"),
391            ("\\Complex", "\\mathbb{C}"),
392            ("\\Dagger", "\\ddagger"),
393            ("\\diamonds", "\\diamondsuit"),
394            ("\\empty", "\\emptyset"),
395            ("\\Epsilon", "\\mathrm{E}"),
396            ("\\Eta", "\\mathrm{H}"),
397            ("\\exist", "\\exists"),
398            ("\\harr", "\\leftrightarrow"),
399            ("\\hArr", "\\Leftrightarrow"),
400            ("\\Harr", "\\Leftrightarrow"),
401            ("\\hearts", "\\heartsuit"),
402            ("\\image", "\\Im"),
403            ("\\infin", "\\infty"),
404            ("\\Iota", "\\mathrm{I}"),
405            ("\\isin", "\\in"),
406            ("\\Kappa", "\\mathrm{K}"),
407            ("\\larr", "\\leftarrow"),
408            ("\\lArr", "\\Leftarrow"),
409            ("\\Larr", "\\Leftarrow"),
410            ("\\lrarr", "\\leftrightarrow"),
411            ("\\lrArr", "\\Leftrightarrow"),
412            ("\\Lrarr", "\\Leftrightarrow"),
413            ("\\Mu", "\\mathrm{M}"),
414            ("\\natnums", "\\mathbb{N}"),
415            ("\\Nu", "\\mathrm{N}"),
416            ("\\Omicron", "\\mathrm{O}"),
417            ("\\plusmn", "\\pm"),
418            ("\\rarr", "\\rightarrow"),
419            ("\\rArr", "\\Rightarrow"),
420            ("\\Rarr", "\\Rightarrow"),
421            ("\\real", "\\Re"),
422            ("\\reals", "\\mathbb{R}"),
423            ("\\Reals", "\\mathbb{R}"),
424            ("\\Rho", "\\mathrm{P}"),
425            ("\\sdot", "\\cdot"),
426            ("\\sect", "\\S"),
427            ("\\spades", "\\spadesuit"),
428            ("\\sub", "\\subset"),
429            ("\\sube", "\\subseteq"),
430            ("\\supe", "\\supseteq"),
431            ("\\Tau", "\\mathrm{T}"),
432            ("\\thetasym", "\\vartheta"),
433            ("\\weierp", "\\wp"),
434            ("\\Zeta", "\\mathrm{Z}"),
435
436            // ── Khan Academy color aliases ──
437            ("\\blue", "\\textcolor{##6495ed}{#1}"),
438            ("\\orange", "\\textcolor{##ffa500}{#1}"),
439            ("\\pink", "\\textcolor{##ff00af}{#1}"),
440            ("\\red", "\\textcolor{##df0030}{#1}"),
441            ("\\green", "\\textcolor{##28ae7b}{#1}"),
442            ("\\gray", "\\textcolor{gray}{#1}"),
443            ("\\purple", "\\textcolor{##9d38bd}{#1}"),
444
445            // ── Unicode script letters ──
446            ("\u{212C}", "\\mathscr{B}"),
447            ("\u{2130}", "\\mathscr{E}"),
448            ("\u{2131}", "\\mathscr{F}"),
449            ("\u{210B}", "\\mathscr{H}"),
450            ("\u{2110}", "\\mathscr{I}"),
451            ("\u{2112}", "\\mathscr{L}"),
452            ("\u{2133}", "\\mathscr{M}"),
453            ("\u{211B}", "\\mathscr{R}"),
454            ("\u{212D}", "\\mathfrak{C}"),
455            ("\u{210C}", "\\mathfrak{H}"),
456            ("\u{2128}", "\\mathfrak{Z}"),
457
458            // ── notni ──
459            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
460
461            // ── actuarialangle ──
462            ("\\angln", "{\\angl n}"),
463
464            // ── set/Set (braket notation, simplified) ──
465            ("\\set", "\\bra@set{\\{\\,}{\\mid}{}{\\,\\}}"),
466            ("\\Set", "\\bra@set{\\left\\{\\:}{\\;\\middle\\vert\\;}{\\;\\middle\\Vert\\;}{\\:\\right\\}}"),
467
468            // ── KaTeX mhchem (\\tripledash for \\bond ~ forms) ──
469            (
470                "\\tripledash",
471                "{\\vphantom{-}\\raisebox{2.56mu}{$\\mkern2mu\\tiny\\text{-}\\mkern1mu\\text{-}\\mkern1mu\\text{-}\\mkern2mu$}}",
472            ),
473        ];
474
475        for &(name, expansion) in builtins {
476            self.macros.set(
477                name.to_string(),
478                MacroDefinition::Text(expansion.to_string()),
479            );
480        }
481
482        self.load_function_macros();
483    }
484
485    fn load_function_macros(&mut self) {
486        // \noexpand: mark the next token as non-expandable (only if expandable)
487        self.macros.set(
488            "\\noexpand".to_string(),
489            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
490                let mut tok = me.pop_token();
491                if me.is_expandable(&tok.text) {
492                    tok.noexpand = true;
493                    tok.treat_as_relax = true;
494                }
495                Ok(vec![tok])
496            }),
497        );
498
499        // \@firstoftwo{A}{B} → A
500        // NOTE: consume_args returns tokens in stack order (reversed).
501        // We return them as-is since expand_once does stack.extend(tokens).
502        self.macros.set(
503            "\\@firstoftwo".to_string(),
504            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
505                let args = me.consume_args(2)?;
506                Ok(args.into_iter().next().unwrap())
507            }),
508        );
509
510        // \@secondoftwo{A}{B} → B
511        self.macros.set(
512            "\\@secondoftwo".to_string(),
513            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
514                let args = me.consume_args(2)?;
515                Ok(args.into_iter().nth(1).unwrap())
516            }),
517        );
518
519        // \@ifnextchar{C}{T}{F}: peek; if next non-space == C then T else F
520        self.macros.set(
521            "\\@ifnextchar".to_string(),
522            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
523                let args = me.consume_args(3)?;
524                me.consume_spaces();
525                let next = me.future().text.clone();
526                let char_arg = &args[0];
527                // char_arg is reversed; the "first" char in original order is the last element
528                let char_text = char_arg.first().map_or("", |t| t.text.as_str());
529                if next == char_text {
530                    Ok(args[1].clone())
531                } else {
532                    Ok(args[2].clone())
533                }
534            }),
535        );
536
537        // \@ifstar{with-star}{without-star}: if next is * → consume * and use first arg
538        self.macros.set(
539            "\\@ifstar".to_string(),
540            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
541                let args = me.consume_args(2)?;
542                let next = me.future().text.clone();
543                if next == "*" {
544                    me.pop_token();
545                    Ok(args[0].clone())
546                } else {
547                    Ok(args[1].clone())
548                }
549            }),
550        );
551
552        // \TextOrMath{text-branch}{math-branch}: choose based on mode
553        self.macros.set(
554            "\\TextOrMath".to_string(),
555            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
556                let args = me.consume_args(2)?;
557                if me.mode == Mode::Text {
558                    Ok(args[0].clone())
559                } else {
560                    Ok(args[1].clone())
561                }
562            }),
563        );
564
565        // \html@mathml is registered as a function in htmlmathml.rs
566
567        // \newcommand{\name}[nargs]{body}
568        self.macros.set(
569            "\\newcommand".to_string(),
570            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
571                handle_newcommand(me, false, true)
572            }),
573        );
574
575        // \renewcommand{\name}[nargs]{body}
576        self.macros.set(
577            "\\renewcommand".to_string(),
578            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
579                handle_newcommand(me, true, false)
580            }),
581        );
582
583        // \providecommand{\name}[nargs]{body}
584        self.macros.set(
585            "\\providecommand".to_string(),
586            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
587                handle_newcommand(me, true, true)
588            }),
589        );
590
591        // \char: parse decimal/octal/hex/backtick number → \@char{N}
592        self.macros.set(
593            "\\char".to_string(),
594            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
595                let mut tok = me.pop_token();
596                let mut number: i64;
597                let base: Option<u32>;
598
599                if tok.text == "'" {
600                    base = Some(8);
601                    tok = me.pop_token();
602                } else if tok.text == "\"" {
603                    base = Some(16);
604                    tok = me.pop_token();
605                } else if tok.text == "`" {
606                    tok = me.pop_token();
607                    if tok.text.starts_with('\\') {
608                        number = tok.text.chars().nth(1).map_or(0, |c| c as i64);
609                    } else {
610                        number = tok.text.chars().next().map_or(0, |c| c as i64);
611                    }
612                    // Build \@char{N} tokens in reverse (stack order)
613                    let s = number.to_string();
614                    let loc = tok.loc.clone();
615                    let mut result = vec![Token::new("}", loc.start, loc.end)];
616                    for ch in s.chars().rev() {
617                        result.push(Token::new(ch.to_string(), loc.start, loc.end));
618                    }
619                    result.push(Token::new("{", loc.start, loc.end));
620                    result.push(Token::new("\\@char", loc.start, loc.end));
621                    return Ok(result);
622                } else {
623                    base = Some(10);
624                }
625
626                if let Some(b) = base {
627                    number = i64::from_str_radix(&tok.text, b).unwrap_or(0);
628                    loop {
629                        let next = me.future().text.clone();
630                        if let Ok(d) = i64::from_str_radix(&next, b) {
631                            me.pop_token();
632                            number = number * (b as i64) + d;
633                        } else {
634                            break;
635                        }
636                    }
637                } else {
638                    number = 0;
639                }
640
641                let s = number.to_string();
642                let loc = tok.loc.clone();
643                let mut result = vec![Token::new("}", loc.start, loc.end)];
644                for ch in s.chars().rev() {
645                    result.push(Token::new(ch.to_string(), loc.start, loc.end));
646                }
647                result.push(Token::new("{", loc.start, loc.end));
648                result.push(Token::new("\\@char", loc.start, loc.end));
649                Ok(result)
650            }),
651        );
652
653        // \operatorname: \@ifstar\operatornamewithlimits\operatorname@
654        self.macros.set(
655            "\\operatorname".to_string(),
656            MacroDefinition::Text(
657                "\\@ifstar\\operatornamewithlimits\\operatorname@".to_string(),
658            ),
659        );
660
661        // \message{...}: consume argument and discard (no-op)
662        self.macros.set(
663            "\\message".to_string(),
664            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
665                let _args = me.consume_args(1)?;
666                Ok(vec![])
667            }),
668        );
669
670        // \errmessage{...}: consume argument and discard (no-op)
671        self.macros.set(
672            "\\errmessage".to_string(),
673            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
674                let _args = me.consume_args(1)?;
675                Ok(vec![])
676            }),
677        );
678
679        // KaTeX HTML extensions: no-op (only render content, no HTML attributes).
680        // Not standard LaTeX; for compatibility we parse and expand to second argument only.
681        // \htmlStyle is registered as a real function so the renderer can honor basic CSS.
682        for name in &["\\htmlClass", "\\htmlData", "\\htmlId"] {
683            let name = (*name).to_string();
684            self.macros.set(
685                name.clone(),
686                MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
687                    let args = me.consume_args(2)?;
688                    let content = args[1].iter().cloned().rev().collect::<Vec<_>>();
689                    Ok(content)
690                }),
691            );
692        }
693
694        // \bra@ket: like \bra@set but replaces ALL | at depth 0 (for \Braket)
695        self.macros.set(
696            "\\bra@ket".to_string(),
697            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
698                let args = me.consume_args(4)?;
699                let left = args[0].clone();
700                let middle = args[1].clone();
701                let middle_double = args[2].clone();
702                let right = args[3].clone();
703
704                let content = me.consume_args(1)?;
705                let content = content.into_iter().next().unwrap();
706
707                // Convert stack-order (reversed) to logical order, replace all | at depth 0,
708                // then reverse back to stack order.
709                let logical: Vec<Token> = content.into_iter().rev().collect();
710                let mut new_logical: Vec<Token> = Vec::new();
711                let mut depth: i32 = 0;
712                let mut i = 0;
713                while i < logical.len() {
714                    let t = &logical[i];
715                    if t.text == "{" {
716                        depth += 1;
717                        new_logical.push(t.clone());
718                    } else if t.text == "}" {
719                        depth -= 1;
720                        new_logical.push(t.clone());
721                    } else if depth == 0 && t.text == "|" {
722                        // Check for || (double pipe) → middleDouble
723                        if !middle_double.is_empty()
724                            && i + 1 < logical.len()
725                            && logical[i + 1].text == "|"
726                        {
727                            // middle_double is in stack/reversed order; reverse to logical order
728                            new_logical.extend(middle_double.iter().rev().cloned());
729                            i += 2;
730                            continue;
731                        }
732                        // middle is in stack/reversed order; reverse to logical order
733                        new_logical.extend(middle.iter().rev().cloned());
734                    } else {
735                        new_logical.push(t.clone());
736                    }
737                    i += 1;
738                }
739
740                // Reverse back to stack order
741                let content_rev: Vec<Token> = new_logical.into_iter().rev().collect();
742
743                // Build: right + content + left (reversed for stack)
744                let mut to_expand = Vec::new();
745                to_expand.extend(right);
746                to_expand.extend(content_rev);
747                to_expand.extend(left);
748
749                me.begin_group();
750                let expanded = me.expand_tokens(to_expand)?;
751                me.end_group();
752
753                Ok(expanded)
754            }),
755        );
756
757        // \bra@set: braket set notation helper
758        // Only replaces the FIRST | with middle tokens (one-shot), matching KaTeX
759        self.macros.set(
760            "\\bra@set".to_string(),
761            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
762                let args = me.consume_args(4)?;
763                let left = args[0].clone();
764                let middle = args[1].clone();
765                let middle_double = args[2].clone();
766                let right = args[3].clone();
767
768                let content = me.consume_args(1)?;
769                let mut content = content.into_iter().next().unwrap();
770
771                // Scan content and replace only the first | at depth 0
772                // Content tokens are in reversed order (stack), so iterate from end
773                let mut depth: i32 = 0;
774                let mut _first_pipe_idx: Option<usize> = None;
775                // Tokens are reversed (last token first in vec), scan in logical order
776                for i in (0..content.len()).rev() {
777                    let t = &content[i];
778                    if t.text == "{" { depth += 1; }
779                    else if t.text == "}" { depth -= 1; }
780                    else if depth == 0 && t.text == "|" {
781                        // Check for || (double pipe) → middleDouble
782                        if !middle_double.is_empty() && i > 0 && content[i - 1].text == "|" {
783                            _first_pipe_idx = Some(i);
784                            // Replace || with middleDouble
785                            content.remove(i);
786                            content.remove(i - 1);
787                            let insert_at = if i >= 2 { i - 1 } else { 0 };
788                            for (j, tok) in middle_double.iter().enumerate() {
789                                content.insert(insert_at + j, tok.clone());
790                            }
791                            break;
792                        }
793                        _first_pipe_idx = Some(i);
794                        content.remove(i);
795                        for (j, tok) in middle.iter().enumerate() {
796                            content.insert(i + j, tok.clone());
797                        }
798                        break;
799                    }
800                }
801
802                // Build: right + content + left (reversed for stack)
803                let mut to_expand = Vec::new();
804                to_expand.extend(right);
805                to_expand.extend(content);
806                to_expand.extend(left);
807
808                me.begin_group();
809                let expanded = me.expand_tokens(to_expand)?;
810                me.end_group();
811
812                Ok(expanded)
813            }),
814        );
815
816        // \\ce / \\pu: KaTeX mhchem 3.3.0 (Rust port in `crate::mhchem`)
817        self.macros.set(
818            "\\ce".to_string(),
819            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
820                let args = me.consume_args(1)?;
821                let s = crate::mhchem::mhchem_arg_tokens_to_string(&args[0]);
822                let tex = crate::mhchem::chem_parse_str(&s, "ce")
823                    .map_err(|e| ParseError::msg(format!("\\ce: {e}")))?;
824                Ok(lex_string_to_stack_tokens(&tex))
825            }),
826        );
827        self.macros.set(
828            "\\pu".to_string(),
829            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
830                let args = me.consume_args(1)?;
831                let s = crate::mhchem::mhchem_arg_tokens_to_string(&args[0]);
832                let tex = crate::mhchem::chem_parse_str(&s, "pu")
833                    .map_err(|e| ParseError::msg(format!("\\pu: {e}")))?;
834                Ok(lex_string_to_stack_tokens(&tex))
835            }),
836        );
837    }
838
839    pub fn set_macro(&mut self, name: String, def: MacroDefinition) {
840        self.macros.set(name, def);
841    }
842
843    pub fn set_macro_global(&mut self, name: String, def: MacroDefinition) {
844        self.macros.set_global(name, def);
845    }
846
847    pub fn set_text_macro(&mut self, name: &str, text: &str) {
848        self.macros.set(
849            name.to_string(),
850            MacroDefinition::Text(text.to_string()),
851        );
852    }
853
854    pub fn get_macro(&self, name: &str) -> Option<&MacroDefinition> {
855        self.macros.get(name)
856    }
857
858    /// Expand a list of tokens fully (for \edef/\xdef).
859    pub fn expand_tokens(&mut self, tokens: Vec<Token>) -> ParseResult<Vec<Token>> {
860        let saved_stack = std::mem::take(&mut self.stack);
861        self.stack = tokens;
862
863        let mut result = Vec::new();
864        loop {
865            if self.stack.is_empty() {
866                break;
867            }
868            let expanded = self.expand_once(false)?;
869            if !expanded {
870                if let Some(tok) = self.stack.pop() {
871                    if tok.is_eof() {
872                        break;
873                    }
874                    result.push(tok);
875                }
876            }
877        }
878
879        self.stack = saved_stack;
880        result.reverse();
881        Ok(result)
882    }
883
884    pub fn switch_mode(&mut self, new_mode: Mode) {
885        self.mode = new_mode;
886    }
887
888    pub fn begin_group(&mut self) {
889        self.macros.begin_group();
890    }
891
892    pub fn end_group(&mut self) {
893        self.macros.end_group();
894    }
895
896    pub fn end_groups(&mut self) {
897        self.macros.end_groups();
898    }
899
900    /// Returns the topmost token on the stack, without expanding it.
901    pub fn future(&mut self) -> &Token {
902        if self.stack.is_empty() {
903            let tok = self.lexer.lex();
904            self.stack.push(tok);
905        }
906        self.stack.last().unwrap()
907    }
908
909    /// Remove and return the next unexpanded token.
910    pub fn pop_token(&mut self) -> Token {
911        self.future();
912        self.stack.pop().unwrap()
913    }
914
915    /// Modify the top token's text on the stack (for \global prefix handling).
916    pub fn set_top_text(&mut self, text: String) {
917        self.future();
918        if let Some(tok) = self.stack.last_mut() {
919            tok.text = text;
920        }
921    }
922
923    /// Push a token onto the stack.
924    pub fn push_token(&mut self, token: Token) {
925        self.stack.push(token);
926    }
927
928    /// Push multiple tokens onto the stack.
929    pub fn push_tokens(&mut self, tokens: Vec<Token>) {
930        self.stack.extend(tokens);
931    }
932
933    /// Consume all following space tokens, without expansion.
934    pub fn consume_spaces(&mut self) {
935        loop {
936            let is_space = self.future().text == " ";
937            if is_space {
938                self.stack.pop();
939            } else {
940                break;
941            }
942        }
943    }
944
945    /// Expand the next token once if possible.
946    /// Returns Ok(true) if expanded, Ok(false) if not expandable.
947    fn expand_once(&mut self, expandable_only: bool) -> ParseResult<bool> {
948        let top_token = self.pop_token();
949        let name = &top_token.text;
950
951        if top_token.noexpand {
952            self.push_token(top_token);
953            return Ok(false);
954        }
955
956        // Check for function-based macro first — always expandable
957        if let Some(MacroDefinition::Function(handler)) = self.macros.get(name).cloned() {
958            self.count_expansion(1)?;
959            let tokens = handler(self)?;
960            self.stack.extend(tokens);
961            return Ok(true);
962        }
963
964        let expansion = self.get_expansion(name);
965        match expansion {
966            None => {
967                if expandable_only && name.starts_with('\\') && !self.is_defined(name) {
968                    return Err(ParseError::new(
969                        format!("Undefined control sequence: {}", name),
970                        Some(&top_token),
971                    ));
972                }
973                self.push_token(top_token);
974                Ok(false)
975            }
976            Some(exp) if expandable_only && exp.unexpandable => {
977                self.push_token(top_token);
978                Ok(false)
979            }
980            Some(exp) => {
981                self.count_expansion(1)?;
982                let mut tokens = exp.tokens;
983                if exp.num_args > 0 {
984                    let args = self.consume_args(exp.num_args)?;
985                    tokens = self.substitute_args(tokens, &args);
986                }
987                self.stack.extend(tokens);
988                Ok(true)
989            }
990        }
991    }
992
993    fn substitute_args(&self, mut tokens: Vec<Token>, args: &[Vec<Token>]) -> Vec<Token> {
994        let mut i = tokens.len();
995        while i > 0 {
996            i -= 1;
997            if tokens[i].text == "#" && i > 0 {
998                let next = &tokens[i - 1];
999                if next.text == "#" {
1000                    tokens.remove(i);
1001                    i -= 1;
1002                } else if let Ok(n) = next.text.parse::<usize>() {
1003                    if n >= 1 && n <= args.len() {
1004                        tokens.remove(i);
1005                        tokens.remove(i - 1);
1006                        let arg_tokens = &args[n - 1];
1007                        for (j, t) in arg_tokens.iter().enumerate() {
1008                            tokens.insert(i - 1 + j, t.clone());
1009                        }
1010                        i = i.saturating_sub(1);
1011                    }
1012                }
1013            }
1014        }
1015        tokens
1016    }
1017
1018    fn get_expansion(&self, name: &str) -> Option<MacroExpansion> {
1019        let def = self.macros.get(name)?;
1020
1021        if name.len() == 1 {
1022            let ch = name.chars().next().unwrap();
1023            let catcode = self.lexer_catcode(ch);
1024            if catcode != 0 && catcode != 13 {
1025                return None;
1026            }
1027        }
1028
1029        match def {
1030            MacroDefinition::Text(text) => {
1031                let mut num_args = 0;
1032                let stripped = text.replace("##", "");
1033                while stripped.contains(&format!("#{}", num_args + 1)) {
1034                    num_args += 1;
1035                }
1036                let mut body_lexer = Lexer::new(text);
1037                let mut tokens = Vec::new();
1038                loop {
1039                    let tok = body_lexer.lex();
1040                    if tok.is_eof() {
1041                        break;
1042                    }
1043                    tokens.push(tok);
1044                }
1045                tokens.reverse();
1046                Some(MacroExpansion {
1047                    tokens,
1048                    num_args,
1049                    unexpandable: false,
1050                })
1051            }
1052            MacroDefinition::Tokens { tokens, num_args } => Some(MacroExpansion {
1053                tokens: tokens.clone(),
1054                num_args: *num_args,
1055                unexpandable: false,
1056            }),
1057            MacroDefinition::Function(_) => {
1058                // Signal that this is a function macro; handled in expand_once
1059                Some(MacroExpansion {
1060                    tokens: vec![],
1061                    num_args: 0,
1062                    unexpandable: false,
1063                })
1064            }
1065        }
1066    }
1067
1068    fn lexer_catcode(&self, ch: char) -> u8 {
1069        self.lexer.get_catcode(ch)
1070    }
1071
1072    fn count_expansion(&mut self, amount: usize) -> ParseResult<()> {
1073        self.expansion_count += amount;
1074        if self.expansion_count > self.max_expand {
1075            Err(ParseError::msg(
1076                "Too many expansions: infinite loop or need to increase maxExpand setting",
1077            ))
1078        } else {
1079            Ok(())
1080        }
1081    }
1082
1083    /// Recursively expand the next token until a non-expandable token is found.
1084    pub fn expand_next_token(&mut self) -> ParseResult<Token> {
1085        loop {
1086            let expanded = self.expand_once(false)?;
1087            if !expanded {
1088                let mut token = self.stack.pop().unwrap();
1089                if token.treat_as_relax {
1090                    token.text = "\\relax".to_string();
1091                }
1092                return Ok(token);
1093            }
1094        }
1095    }
1096
1097    /// Consume a single argument from the token stream.
1098    pub fn consume_arg(&mut self, delims: Option<&[&str]>) -> ParseResult<ConsumedArg> {
1099        let is_delimited = delims.is_some_and(|d| !d.is_empty());
1100        if !is_delimited {
1101            self.consume_spaces();
1102        }
1103
1104        let start = self.future().clone();
1105        let mut tokens = Vec::new();
1106        let mut depth: i32 = 0;
1107        let mut end_tok;
1108
1109        loop {
1110            let tok = self.pop_token();
1111            end_tok = tok.clone();
1112            tokens.push(tok.clone());
1113
1114            if tok.text == "{" {
1115                depth += 1;
1116            } else if tok.text == "}" {
1117                depth -= 1;
1118                if depth == -1 {
1119                    return Err(ParseError::new("Extra }", Some(&tok)));
1120                }
1121            } else if tok.is_eof() {
1122                return Err(ParseError::new(
1123                    "Unexpected end of input in a macro argument",
1124                    Some(&tok),
1125                ));
1126            }
1127
1128            if depth == 0 && !is_delimited {
1129                break;
1130            }
1131
1132            if let Some(delims) = delims {
1133                if is_delimited && depth == 0 {
1134                    if let Some(last) = delims.last() {
1135                        if tok.text == *last {
1136                            tokens.pop();
1137                            break;
1138                        }
1139                    }
1140                }
1141            }
1142        }
1143
1144        if start.text == "{" && tokens.last().is_some_and(|t| t.text == "}") {
1145            tokens.pop();
1146            tokens.remove(0);
1147        }
1148
1149        tokens.reverse();
1150
1151        Ok(ConsumedArg {
1152            tokens,
1153            start,
1154            end: end_tok,
1155        })
1156    }
1157
1158    /// Consume N arguments.
1159    fn consume_args(&mut self, num_args: usize) -> ParseResult<Vec<Vec<Token>>> {
1160        let mut args = Vec::with_capacity(num_args);
1161        for _ in 0..num_args {
1162            let arg = self.consume_arg(None)?;
1163            args.push(arg.tokens);
1164        }
1165        Ok(args)
1166    }
1167
1168    /// Scan a function argument (optional or mandatory).
1169    /// Pushes an EOF token to mark the end, then pushes the argument tokens.
1170    pub fn scan_argument(&mut self, is_optional: bool) -> ParseResult<Option<Token>> {
1171        if is_optional {
1172            self.consume_spaces();
1173            if self.future().text != "[" {
1174                return Ok(None);
1175            }
1176            let start = self.pop_token();
1177            let arg = self.consume_arg(Some(&["]"]))?;
1178            let end = &arg.end;
1179            let end_loc = end.loc.clone();
1180
1181            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1182            self.push_tokens(arg.tokens);
1183
1184            let result = Token {
1185                text: String::new(),
1186                loc: SourceLocation::range(&start.loc, &end_loc),
1187                noexpand: false,
1188                treat_as_relax: false,
1189            };
1190            Ok(Some(result))
1191        } else {
1192            let arg = self.consume_arg(None)?;
1193            let end_loc = arg.end.loc.clone();
1194
1195            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1196            self.push_tokens(arg.tokens);
1197
1198            let result = Token {
1199                text: String::new(),
1200                loc: SourceLocation::range(&arg.start.loc, &end_loc),
1201                noexpand: false,
1202                treat_as_relax: false,
1203            };
1204            Ok(Some(result))
1205        }
1206    }
1207
1208    /// Check if a command name is currently defined.
1209    pub fn is_defined(&self, name: &str) -> bool {
1210        self.macros.has(name)
1211            || FUNCTIONS.contains_key(name)
1212            || is_known_symbol(name)
1213            || IMPLICIT_COMMANDS.contains(&name)
1214    }
1215
1216    /// Check if a command is expandable.
1217    pub fn is_expandable(&self, name: &str) -> bool {
1218        if let Some(_def) = self.macros.get(name) {
1219            return true;
1220        }
1221        if let Some(func) = FUNCTIONS.get(name) {
1222            return !func.primitive;
1223        }
1224        false
1225    }
1226}
1227
1228pub struct ConsumedArg {
1229    pub tokens: Vec<Token>,
1230    pub start: Token,
1231    pub end: Token,
1232}
1233
1234fn handle_newcommand(
1235    me: &mut MacroExpander,
1236    exists_ok: bool,
1237    nonexists_ok: bool,
1238) -> ParseResult<Vec<Token>> {
1239    let name_arg = me.consume_arg(None)?;
1240    // name_arg.tokens is reversed (stack order); last element = first token in original
1241    let name = name_arg.tokens.last().map_or_else(String::new, |t| t.text.clone());
1242
1243    let exists = me.is_defined(&name);
1244    if exists && !exists_ok {
1245        return Err(ParseError::msg(format!(
1246            "\\newcommand{{{}}} attempting to redefine {}; use \\renewcommand",
1247            name, name
1248        )));
1249    }
1250    if !exists && !nonexists_ok {
1251        return Err(ParseError::msg(format!(
1252            "\\renewcommand{{{}}} when command {} does not yet exist; use \\newcommand",
1253            name, name
1254        )));
1255    }
1256
1257    me.consume_spaces();
1258    let mut num_args = 0usize;
1259    if me.future().text == "[" {
1260        me.pop_token();
1261        let narg_tok = me.pop_token();
1262        num_args = narg_tok.text.parse().unwrap_or(0);
1263        let close = me.pop_token();
1264        if close.text != "]" {
1265            return Err(ParseError::msg("Expected ] in \\newcommand"));
1266        }
1267    }
1268
1269    let body_arg = me.consume_arg(None)?;
1270    let tokens = body_arg.tokens;
1271
1272    me.set_macro(name, MacroDefinition::Tokens { tokens, num_args });
1273    Ok(vec![])
1274}
1275
1276fn is_known_symbol(name: &str) -> bool {
1277    use ratex_font::symbols;
1278    symbols::get_symbol(name, symbols::Mode::Math).is_some()
1279        || symbols::get_symbol(name, symbols::Mode::Text).is_some()
1280}