Skip to main content

ratex_parser/
macro_expander.rs

1use std::collections::HashMap;
2
3use ratex_lexer::token::{SourceLocation, Token};
4use ratex_lexer::Lexer;
5
6use crate::error::{ParseError, ParseResult};
7use crate::functions::FUNCTIONS;
8use crate::parse_node::Mode;
9
10/// Commands that act like macros but aren't defined as a macro, function, or symbol.
11/// Used in `is_defined`.
12pub static IMPLICIT_COMMANDS: &[&str] = &["^", "_", "\\limits", "\\nolimits"];
13
14/// Handler type for function-based macros (e.g. \TextOrMath, \@ifstar).
15/// Takes the MacroExpander mutably and returns tokens to push onto the stack.
16pub type FnMacroHandler = fn(&mut MacroExpander) -> ParseResult<Vec<Token>>;
17
18/// A macro definition: string template, token list, or function.
19#[derive(Clone)]
20pub enum MacroDefinition {
21    /// Simple string expansion (e.g., `\def\foo{bar}` → "bar")
22    Text(String),
23    /// Pre-tokenized expansion with argument count
24    Tokens {
25        tokens: Vec<Token>,
26        num_args: usize,
27    },
28    /// Function-based macro (consumes tokens directly, returns expansion)
29    Function(FnMacroHandler),
30}
31
32impl std::fmt::Debug for MacroDefinition {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            Self::Text(s) => write!(f, "Text({:?})", s),
36            Self::Tokens { tokens, num_args } => {
37                write!(f, "Tokens {{ {} tokens, {} args }}", tokens.len(), num_args)
38            }
39            Self::Function(_) => write!(f, "Function(...)"),
40        }
41    }
42}
43
44/// Result of expanding a macro once.
45struct MacroExpansion {
46    tokens: Vec<Token>,
47    num_args: usize,
48    unexpandable: bool,
49}
50
51/// The MacroExpander (or "gullet") manages macro expansion.
52///
53/// It sits between the Lexer (mouth) and the Parser (stomach).
54/// Tokens are read from the lexer, pushed onto an internal stack,
55/// and macros are expanded until only non-expandable tokens remain.
56///
57/// Modeled after KaTeX's MacroExpander.ts.
58pub struct MacroExpander<'a> {
59    pub lexer: Lexer<'a>,
60    pub mode: Mode,
61    stack: Vec<Token>,
62    macros: MacroNamespace,
63    expansion_count: usize,
64    max_expand: usize,
65}
66
67/// Scoped macro namespace supporting group nesting.
68struct MacroNamespace {
69    current: HashMap<String, MacroDefinition>,
70    group_stack: Vec<HashMap<String, Option<MacroDefinition>>>,
71}
72
73impl MacroNamespace {
74    fn new() -> Self {
75        Self {
76            current: HashMap::new(),
77            group_stack: Vec::new(),
78        }
79    }
80
81    fn get(&self, name: &str) -> Option<&MacroDefinition> {
82        self.current.get(name)
83    }
84
85    fn set(&mut self, name: String, def: MacroDefinition) {
86        if let Some(undo) = self.group_stack.last_mut() {
87            undo.entry(name.clone()).or_insert_with(|| self.current.get(&name).cloned());
88        }
89        self.current.insert(name, def);
90    }
91
92    fn set_global(&mut self, name: String, def: MacroDefinition) {
93        self.current.insert(name, def);
94    }
95
96    fn has(&self, name: &str) -> bool {
97        self.current.contains_key(name)
98    }
99
100    fn begin_group(&mut self) {
101        self.group_stack.push(HashMap::new());
102    }
103
104    fn end_group(&mut self) {
105        if let Some(undo) = self.group_stack.pop() {
106            for (name, old_val) in undo {
107                match old_val {
108                    Some(def) => { self.current.insert(name, def); }
109                    None => { self.current.remove(&name); }
110                }
111            }
112        }
113    }
114
115    fn end_groups(&mut self) {
116        while !self.group_stack.is_empty() {
117            self.end_group();
118        }
119    }
120}
121
122/// Tokenize a macro expansion string into stack order (same as [`MacroDefinition::Text`] bodies).
123fn lex_string_to_stack_tokens(text: &str) -> Vec<Token> {
124    let mut body_lexer = Lexer::new(text);
125    let mut tokens = Vec::new();
126    loop {
127        let tok = body_lexer.lex();
128        if tok.is_eof() {
129            break;
130        }
131        tokens.push(tok);
132    }
133    tokens.reverse();
134    tokens
135}
136
137impl<'a> MacroExpander<'a> {
138    pub fn new(input: &'a str, mode: Mode) -> Self {
139        let mut me = Self {
140            lexer: Lexer::new(input),
141            mode,
142            stack: Vec::new(),
143            macros: MacroNamespace::new(),
144            expansion_count: 0,
145            max_expand: 1000,
146        };
147        me.load_builtins();
148        me
149    }
150
151    fn load_builtins(&mut self) {
152        let builtins: &[(&str, &str)] = &[
153            // ── Grouping ──
154            ("\\bgroup", "{"),
155            ("\\egroup", "}"),
156
157            // ── Symbols from latex.ltx ──
158            ("\\lq", "`"),
159            ("\\rq", "'"),
160            // \lbrack and \rbrack are in the symbol table directly
161            ("\\aa", "\\r a"),
162            ("\\AA", "\\r A"),
163
164            // ── Active characters ──
165            ("~", "\\nobreakspace"),
166
167            // ── Phantoms ──
168            ("\\hphantom", "\\smash{\\phantom{#1}}"),
169
170            // ── Negated symbols ──
171            ("\\not", "\\html@mathml{\\mathrel{\\mathrlap\\@not}\\nobreak}{\\char\"338}"),
172            ("\\neq", "\\html@mathml{\\mathrel{\\not=}}{\\mathrel{\\char`≠}}"),
173            ("\\ne", "\\neq"),
174            ("\u{2260}", "\\neq"),
175            ("\\notin", "\\html@mathml{\\mathrel{{\\in}\\mathllap{/\\mskip1mu}}}{\\mathrel{\\char`∉}}"),
176            ("\u{2209}", "\\notin"),
177            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
178            ("\u{220C}", "\\notni"),
179            // \le and \ge are in the symbol table directly, not macros
180
181            // ── amsmath iff/implies ──
182            ("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;"),
183            ("\\implies", "\\DOTSB\\;\\Longrightarrow\\;"),
184            ("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;"),
185
186            // ── Italic Greek capitals ──
187            ("\\varGamma", "\\mathit{\\Gamma}"),
188            ("\\varDelta", "\\mathit{\\Delta}"),
189            ("\\varTheta", "\\mathit{\\Theta}"),
190            ("\\varLambda", "\\mathit{\\Lambda}"),
191            ("\\varXi", "\\mathit{\\Xi}"),
192            ("\\varPi", "\\mathit{\\Pi}"),
193            ("\\varSigma", "\\mathit{\\Sigma}"),
194            ("\\varUpsilon", "\\mathit{\\Upsilon}"),
195            ("\\varPhi", "\\mathit{\\Phi}"),
196            ("\\varPsi", "\\mathit{\\Psi}"),
197            ("\\varOmega", "\\mathit{\\Omega}"),
198
199            // ── Spacing (mode-aware via \TextOrMath) ──
200            ("\\,", "\\TextOrMath{\\kern{.1667em}}{\\mskip{3mu}}"),
201            ("\\thinspace", "\\,"),
202            ("\\>", "\\mskip{4mu}"),
203            ("\\:", "\\TextOrMath{\\kern{.2222em}}{\\mskip{4mu}}"),
204            ("\\medspace", "\\:"),
205            ("\\;", "\\TextOrMath{\\kern{.2777em}}{\\mskip{5mu}}"),
206            ("\\thickspace", "\\;"),
207            ("\\!", "\\TextOrMath{\\kern{-.1667em}}{\\mskip{-3mu}}"),
208            ("\\negthinspace", "\\!"),
209            ("\\negmedspace", "\\TextOrMath{\\kern{-.2222em}}{\\mskip{-4mu}}"),
210            ("\\negthickspace", "\\TextOrMath{\\kern{-.2777em}}{\\mskip{-5mu}}"),
211            ("\\enspace", "\\kern.5em "),
212            ("\\enskip", "\\hskip.5em\\relax"),
213            ("\\quad", "\\hskip1em\\relax"),
214            ("\\qquad", "\\hskip2em\\relax"),
215
216            // ── Newline ──
217            ("\\newline", "\\\\\\relax"),
218
219            // ── hspace ──
220            ("\\@hspace", "\\hskip #1\\relax"),
221            ("\\@hspacer", "\\rule{0pt}{0pt}\\hskip #1\\relax"),
222
223            // ── llap / rlap / clap ──
224            ("\\llap", "\\mathllap{\\textrm{#1}}"),
225            ("\\rlap", "\\mathrlap{\\textrm{#1}}"),
226            ("\\clap", "\\mathclap{\\textrm{#1}}"),
227
228            // ── Logos ──
229            ("\\TeX", "\\textrm{\\html@mathml{T\\kern-.1667em\\raisebox{-.5ex}{E}\\kern-.125emX}{TeX}}"),
230            ("\\LaTeX", "\\textrm{\\html@mathml{L\\kern-.36em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{LaTeX}}"),
231            ("\\KaTeX", "\\textrm{\\html@mathml{K\\kern-.17em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{KaTeX}}"),
232
233            // ── imath / jmath ──
234            ("\\imath", "\\html@mathml{\\@imath}{\u{0131}}"),
235            ("\\jmath", "\\html@mathml{\\@jmath}{\u{0237}}"),
236
237            // ── minuso ──
238            ("\\minuso", "\\mathbin{\\html@mathml{{\\mathrlap{\\mathchoice{\\kern{0.145em}}{\\kern{0.145em}}{\\kern{0.1015em}}{\\kern{0.0725em}}\\circ}{-}}}{\\char`\u{29B5}}}"),
239            ("\\clap", "\\mathclap{\\textrm{#1}}"),
240
241            // ── mathstrut / underbar ──
242            ("\\mathstrut", "\\vphantom{(}"),
243            ("\\underbar", "\\underline{\\text{#1}}"),
244
245            // ── Bbbk ──
246            ("\\Bbbk", "\\Bbb{k}"),
247
248            // ── substack ──
249            ("\\substack", "\\begin{subarray}{c}#1\\end{subarray}"),
250
251            // ── boxed ──
252            ("\\boxed", "\\fbox{$\\displaystyle{#1}$}"),
253
254            // ── colon ──
255            ("\\colon", "\\nobreak\\mskip2mu\\mathpunct{}\\mathchoice{\\mkern-3mu}{\\mkern-3mu}{}{}{:}\\mskip6mu\\relax"),
256
257            // ── dots (string-based) ──
258            ("\\dots", "\\cdots"),
259            ("\\cdots", "\\@cdots"),
260            ("\\dotsb", "\\cdots"),
261            ("\\dotsm", "\\cdots"),
262            ("\\dotsi", "\\!\\cdots"),
263            ("\\dotsx", "\\ldots\\,"),
264            ("\\dotsc", "\\ldots"),  // comma list: x,\dotsc,y
265            ("\\dotso", "\\ldots"),  // other
266            ("\\DOTSI", "\\relax"),
267            ("\\DOTSB", "\\relax"),
268            ("\\DOTSX", "\\relax"),
269
270            // ── negated relations / corners (→ symbol table \@xxx) ──
271            ("\\gvertneqq", "\\@gvertneqq"),
272            ("\\lvertneqq", "\\@lvertneqq"),
273            ("\\ngeqq", "\\@ngeqq"),
274            ("\\ngeqslant", "\\@ngeqslant"),
275            ("\\nleqq", "\\@nleqq"),
276            ("\\nleqslant", "\\@nleqslant"),
277            ("\\nshortmid", "\\@nshortmid"),
278            ("\\nshortparallel", "\\@nshortparallel"),
279            ("\\nsubseteqq", "\\@nsubseteqq"),
280            ("\\nsupseteqq", "\\@nsupseteqq"),
281            ("\\ulcorner", "\\@ulcorner"),
282            ("\\urcorner", "\\@urcorner"),
283            ("\\llcorner", "\\@llcorner"),
284            ("\\lrcorner", "\\@lrcorner"),
285            ("\\varsubsetneq", "\\@varsubsetneq"),
286            ("\\varsubsetneqq", "\\@varsubsetneqq"),
287            ("\\varsupsetneq", "\\@varsupsetneq"),
288            ("\\varsupsetneqq", "\\@varsupsetneqq"),
289
290            // ── delimiters / text (compose from existing) ──
291            // Match KaTeX `macros.ts` html@mathml first branch (STIX-style white tortoise brackets).
292            ("\\lBrace", "\\mathopen{\\{\\mkern-3.2mu[}"),
293            ("\\rBrace", "\\mathclose{]\\mkern-3.2mu\\}}"),
294            ("\\llbracket", "\\mathopen{[\\mkern-3.2mu[}"),
295            ("\\rrbracket", "\\mathclose{]\\mkern-3.2mu]}"),
296            ("\\copyright", "\\textcircled{c}"),
297            ("\\textregistered", "\\textcircled{\\scriptsize R}"),
298            ("\\textcopyright", "\\html@mathml{\\textcircled{c}}{\\char`©}"),
299
300            // ── tmspace (TeX primitive used by \, \: \; \!) ──
301            // KaTeX `\tmspace` takes 3 args: sign (+/-), mu-glue, em-kern.
302            ("\\tmspace", "\\TextOrMath{\\kern#1#3}{\\mskip#1#2}\\relax"),
303
304            // ── Unicode shorthand aliases ──
305            // Mirrors the `defineMacro("\u{...}", "...")` block in
306            // KaTeX `src/macros.ts`, so users can paste raw glyphs and
307            // get the same expansion as the named macro.
308            // Stacked relations (\u2258..\u225F).
309            ("\u{2258}", "\\mathrel{=\\kern{-1em}\\raisebox{0.4em}{$\\scriptsize\\frown$}}"),
310            ("\u{2259}", "\\stackrel{\\tiny\\wedge}{=}"),
311            ("\u{225A}", "\\stackrel{\\tiny\\vee}{=}"),
312            ("\u{225B}", "\\stackrel{\\scriptsize\\star}{=}"),
313            ("\u{225D}", "\\stackrel{\\tiny\\mathrm{def}}{=}"),
314            ("\u{225E}", "\\stackrel{\\tiny\\mathrm{m}}{=}"),
315            ("\u{225F}", "\\stackrel{\\tiny?}{=}"),
316            // Misc relations / corners / punctuation.
317            ("\u{27C2}", "\\perp"),
318            ("\u{203C}", "\\mathclose{!\\mkern-0.8mu!}"),
319            ("\u{231C}", "\\ulcorner"),
320            ("\u{231D}", "\\urcorner"),
321            ("\u{231E}", "\\llcorner"),
322            ("\u{231F}", "\\lrcorner"),
323            ("\u{00A9}", "\\copyright"),
324            ("\u{00AE}", "\\textregistered"),
325            // Mathtools colon variants (∷ ∹ ≔ ≕ ⩴).
326            ("\u{2237}", "\\dblcolon"),
327            ("\u{2239}", "\\eqcolon"),
328            ("\u{2254}", "\\coloneqq"),
329            ("\u{2255}", "\\eqqcolon"),
330            ("\u{2A74}", "\\Coloneqq"),
331            // stmaryrd brackets.
332            ("\u{27E6}", "\\llbracket"),
333            ("\u{27E7}", "\\rrbracket"),
334            ("\u{2983}", "\\lBrace"),
335            ("\u{2984}", "\\rBrace"),
336            // Plimsoll.
337            ("\u{29B5}", "\\minuso"),
338
339            // ── dddot / ddddot ──
340            ("\\dddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ...}}{#1}}"),
341            ("\\ddddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ....}}{#1}}"),
342
343            // ── vdots ──
344            ("\\vdots", "{\\varvdots\\rule{0pt}{15pt}}"),
345            ("\u{22ee}", "\\vdots"),
346
347            // ── bmod / pod / pmod / mod ──
348            ("\\bmod", "\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}\\mathbin{\\rm mod}\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}"),
349            ("\\pod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern8mu}{\\mkern8mu}{\\mkern8mu}(#1)"),
350            ("\\pmod", "\\pod{{\\rm mod}\\mkern6mu#1}"),
351            ("\\mod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern12mu}{\\mkern12mu}{\\mkern12mu}{\\rm mod}\\,\\,#1"),
352
353            // ── limsup / liminf / etc ──
354            ("\\limsup", "\\DOTSB\\operatorname*{lim\\,sup}"),
355            ("\\liminf", "\\DOTSB\\operatorname*{lim\\,inf}"),
356            ("\\injlim", "\\DOTSB\\operatorname*{inj\\,lim}"),
357            ("\\projlim", "\\DOTSB\\operatorname*{proj\\,lim}"),
358            ("\\varlimsup", "\\DOTSB\\operatorname*{\\overline{\\mathrm{lim}}}"),
359            ("\\varliminf", "\\DOTSB\\operatorname*{\\underline{\\mathrm{lim}}}"),
360            ("\\varinjlim", "\\DOTSB\\operatorname*{\\underrightarrow{\\mathrm{lim}}}"),
361            ("\\varprojlim", "\\DOTSB\\operatorname*{\\underleftarrow{\\mathrm{lim}}}"),
362
363            // ── statmath ──
364            ("\\argmin", "\\DOTSB\\operatorname*{arg\\,min}"),
365            ("\\argmax", "\\DOTSB\\operatorname*{arg\\,max}"),
366            ("\\plim", "\\DOTSB\\mathop{\\operatorname{plim}}\\limits"),
367
368            // ── mathtools colon variants ──
369            ("\\ordinarycolon", ":"),
370            ("\\vcentcolon", "\\mathrel{\\mathop\\ordinarycolon}"),
371            ("\\dblcolon", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon}}{\\mathop{\\char\"2237}}"),
372            ("\\coloneqq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2254}}"),
373            ("\\Coloneqq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2237\\char\"3d}}"),
374            ("\\coloneq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"3a\\char\"2212}}"),
375            ("\\Coloneq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"2237\\char\"2212}}"),
376            ("\\eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2255}}"),
377            ("\\Eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"3d\\char\"2237}}"),
378            ("\\eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2239}}"),
379            ("\\Eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"2212\\char\"2237}}"),
380            ("\\colonapprox", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"3a\\char\"2248}}"),
381            ("\\Colonapprox", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"2237\\char\"2248}}"),
382            ("\\colonsim", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"3a\\char\"223c}}"),
383            ("\\Colonsim", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"2237\\char\"223c}}"),
384
385            // ── colonequals alternate names ──
386            ("\\ratio", "\\vcentcolon"),
387            ("\\coloncolon", "\\dblcolon"),
388            ("\\colonequals", "\\coloneqq"),
389            ("\\coloncolonequals", "\\Coloneqq"),
390            ("\\equalscolon", "\\eqqcolon"),
391            ("\\equalscoloncolon", "\\Eqqcolon"),
392            ("\\colonminus", "\\coloneq"),
393            ("\\coloncolonminus", "\\Coloneq"),
394            ("\\minuscolon", "\\eqcolon"),
395            ("\\minuscoloncolon", "\\Eqcolon"),
396            ("\\coloncolonapprox", "\\Colonapprox"),
397            ("\\coloncolonsim", "\\Colonsim"),
398            ("\\simcolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
399            ("\\simcoloncolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
400            ("\\approxcolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
401            ("\\approxcoloncolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
402
403            // ── braket (string-based) ──
404            ("\\bra", "\\mathinner{\\langle{#1}|}"),
405            ("\\ket", "\\mathinner{|{#1}\\rangle}"),
406            ("\\braket", "\\mathinner{\\langle{#1}\\rangle}"),
407            ("\\Braket", "\\bra@ket{\\left\\langle}{\\,\\middle\\vert\\,}{\\,\\middle\\vert\\,}{\\right\\rangle}"),
408            ("\\Bra", "\\left\\langle#1\\right|"),
409            ("\\Ket", "\\left|#1\\right\\rangle"),
410
411            // ── texvc (MediaWiki) ──
412            ("\\darr", "\\downarrow"),
413            ("\\dArr", "\\Downarrow"),
414            ("\\Darr", "\\Downarrow"),
415            ("\\lang", "\\langle"),
416            ("\\rang", "\\rangle"),
417            ("\\uarr", "\\uparrow"),
418            ("\\uArr", "\\Uparrow"),
419            ("\\Uarr", "\\Uparrow"),
420            ("\\N", "\\mathbb{N}"),
421            ("\\R", "\\mathbb{R}"),
422            ("\\Z", "\\mathbb{Z}"),
423            ("\\alef", "\\aleph"),
424            ("\\alefsym", "\\aleph"),
425            ("\\Alpha", "\\mathrm{A}"),
426            ("\\Beta", "\\mathrm{B}"),
427            ("\\bull", "\\bullet"),
428            ("\\Chi", "\\mathrm{X}"),
429            ("\\clubs", "\\clubsuit"),
430            ("\\cnums", "\\mathbb{C}"),
431            ("\\Complex", "\\mathbb{C}"),
432            ("\\Dagger", "\\ddagger"),
433            ("\\diamonds", "\\diamondsuit"),
434            ("\\empty", "\\emptyset"),
435            ("\\Epsilon", "\\mathrm{E}"),
436            ("\\Eta", "\\mathrm{H}"),
437            ("\\exist", "\\exists"),
438            ("\\harr", "\\leftrightarrow"),
439            ("\\hArr", "\\Leftrightarrow"),
440            ("\\Harr", "\\Leftrightarrow"),
441            ("\\hearts", "\\heartsuit"),
442            ("\\image", "\\Im"),
443            ("\\infin", "\\infty"),
444            ("\\Iota", "\\mathrm{I}"),
445            ("\\isin", "\\in"),
446            ("\\Kappa", "\\mathrm{K}"),
447            ("\\larr", "\\leftarrow"),
448            ("\\lArr", "\\Leftarrow"),
449            ("\\Larr", "\\Leftarrow"),
450            ("\\lrarr", "\\leftrightarrow"),
451            ("\\lrArr", "\\Leftrightarrow"),
452            ("\\Lrarr", "\\Leftrightarrow"),
453            ("\\Mu", "\\mathrm{M}"),
454            ("\\natnums", "\\mathbb{N}"),
455            ("\\Nu", "\\mathrm{N}"),
456            ("\\Omicron", "\\mathrm{O}"),
457            ("\\plusmn", "\\pm"),
458            ("\\rarr", "\\rightarrow"),
459            ("\\rArr", "\\Rightarrow"),
460            ("\\Rarr", "\\Rightarrow"),
461            ("\\real", "\\Re"),
462            ("\\reals", "\\mathbb{R}"),
463            ("\\Reals", "\\mathbb{R}"),
464            ("\\Rho", "\\mathrm{P}"),
465            ("\\sdot", "\\cdot"),
466            ("\\sect", "\\S"),
467            ("\\spades", "\\spadesuit"),
468            ("\\sub", "\\subset"),
469            ("\\sube", "\\subseteq"),
470            ("\\supe", "\\supseteq"),
471            ("\\Tau", "\\mathrm{T}"),
472            ("\\thetasym", "\\vartheta"),
473            ("\\weierp", "\\wp"),
474            ("\\Zeta", "\\mathrm{Z}"),
475
476            // ── Khan Academy color aliases ──
477            ("\\blue", "\\textcolor{##6495ed}{#1}"),
478            ("\\orange", "\\textcolor{##ffa500}{#1}"),
479            ("\\pink", "\\textcolor{##ff00af}{#1}"),
480            ("\\red", "\\textcolor{##df0030}{#1}"),
481            ("\\green", "\\textcolor{##28ae7b}{#1}"),
482            ("\\gray", "\\textcolor{gray}{#1}"),
483            ("\\purple", "\\textcolor{##9d38bd}{#1}"),
484
485            // ── Unicode script letters ──
486            ("\u{212C}", "\\mathscr{B}"),
487            ("\u{2130}", "\\mathscr{E}"),
488            ("\u{2131}", "\\mathscr{F}"),
489            ("\u{210B}", "\\mathscr{H}"),
490            ("\u{2110}", "\\mathscr{I}"),
491            ("\u{2112}", "\\mathscr{L}"),
492            ("\u{2133}", "\\mathscr{M}"),
493            ("\u{211B}", "\\mathscr{R}"),
494            ("\u{212D}", "\\mathfrak{C}"),
495            ("\u{210C}", "\\mathfrak{H}"),
496            ("\u{2128}", "\\mathfrak{Z}"),
497
498            // ── notni ──
499            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
500
501            // ── actuarialangle ──
502            ("\\angln", "{\\angl n}"),
503
504            // ── set/Set (braket notation, simplified) ──
505            ("\\set", "\\bra@set{\\{\\,}{\\mid}{}{\\,\\}}"),
506            ("\\Set", "\\bra@set{\\left\\{\\:}{\\;\\middle\\vert\\;}{\\;\\middle\\Vert\\;}{\\:\\right\\}}"),
507
508            // ── KaTeX mhchem (\\tripledash for \\bond ~ forms) ──
509            (
510                "\\tripledash",
511                "{\\vphantom{-}\\raisebox{2.56mu}{$\\mkern2mu\\tiny\\text{-}\\mkern1mu\\text{-}\\mkern1mu\\text{-}\\mkern2mu$}}",
512            ),
513        ];
514
515        for &(name, expansion) in builtins {
516            self.macros.set(
517                name.to_string(),
518                MacroDefinition::Text(expansion.to_string()),
519            );
520        }
521
522        self.load_function_macros();
523    }
524
525    fn load_function_macros(&mut self) {
526        // \noexpand: mark the next token as non-expandable (only if expandable)
527        self.macros.set(
528            "\\noexpand".to_string(),
529            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
530                let mut tok = me.pop_token();
531                if me.is_expandable(&tok.text) {
532                    tok.noexpand = true;
533                    tok.treat_as_relax = true;
534                }
535                Ok(vec![tok])
536            }),
537        );
538
539        // \@firstoftwo{A}{B} → A
540        // NOTE: consume_args returns tokens in stack order (reversed).
541        // We return them as-is since expand_once does stack.extend(tokens).
542        self.macros.set(
543            "\\@firstoftwo".to_string(),
544            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
545                let args = me.consume_args(2)?;
546                Ok(args.into_iter().next().unwrap())
547            }),
548        );
549
550        // \@secondoftwo{A}{B} → B
551        self.macros.set(
552            "\\@secondoftwo".to_string(),
553            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
554                let args = me.consume_args(2)?;
555                Ok(args.into_iter().nth(1).unwrap())
556            }),
557        );
558
559        // \@ifnextchar{C}{T}{F}: peek; if next non-space == C then T else F
560        self.macros.set(
561            "\\@ifnextchar".to_string(),
562            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
563                let args = me.consume_args(3)?;
564                me.consume_spaces();
565                let next = me.future().text.clone();
566                let char_arg = &args[0];
567                // char_arg is reversed; the "first" char in original order is the last element
568                let char_text = char_arg.first().map_or("", |t| t.text.as_str());
569                if next == char_text {
570                    Ok(args[1].clone())
571                } else {
572                    Ok(args[2].clone())
573                }
574            }),
575        );
576
577        // \@ifstar{with-star}{without-star}: if next is * → consume * and use first arg
578        self.macros.set(
579            "\\@ifstar".to_string(),
580            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
581                let args = me.consume_args(2)?;
582                let next = me.future().text.clone();
583                if next == "*" {
584                    me.pop_token();
585                    Ok(args[0].clone())
586                } else {
587                    Ok(args[1].clone())
588                }
589            }),
590        );
591
592        // \TextOrMath{text-branch}{math-branch}: choose based on mode
593        self.macros.set(
594            "\\TextOrMath".to_string(),
595            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
596                let args = me.consume_args(2)?;
597                if me.mode == Mode::Text {
598                    Ok(args[0].clone())
599                } else {
600                    Ok(args[1].clone())
601                }
602            }),
603        );
604
605        // \html@mathml is registered as a function in htmlmathml.rs
606
607        // \newcommand{\name}[nargs]{body}
608        self.macros.set(
609            "\\newcommand".to_string(),
610            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
611                handle_newcommand(me, false, true)
612            }),
613        );
614
615        // \renewcommand{\name}[nargs]{body}
616        self.macros.set(
617            "\\renewcommand".to_string(),
618            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
619                handle_newcommand(me, true, false)
620            }),
621        );
622
623        // \providecommand{\name}[nargs]{body}
624        self.macros.set(
625            "\\providecommand".to_string(),
626            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
627                handle_newcommand(me, true, true)
628            }),
629        );
630
631        // \char: parse decimal/octal/hex/backtick number → \@char{N}
632        self.macros.set(
633            "\\char".to_string(),
634            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
635                let mut tok = me.pop_token();
636                let mut number: i64;
637                let base: Option<u32>;
638
639                if tok.text == "'" {
640                    base = Some(8);
641                    tok = me.pop_token();
642                } else if tok.text == "\"" {
643                    base = Some(16);
644                    tok = me.pop_token();
645                } else if tok.text == "`" {
646                    tok = me.pop_token();
647                    if tok.text.starts_with('\\') {
648                        number = tok.text.chars().nth(1).map_or(0, |c| c as i64);
649                    } else {
650                        number = tok.text.chars().next().map_or(0, |c| c as i64);
651                    }
652                    // Build \@char{N} tokens in reverse (stack order)
653                    let s = number.to_string();
654                    let loc = tok.loc.clone();
655                    let mut result = vec![Token::new("}", loc.start, loc.end)];
656                    for ch in s.chars().rev() {
657                        result.push(Token::new(ch.to_string(), loc.start, loc.end));
658                    }
659                    result.push(Token::new("{", loc.start, loc.end));
660                    result.push(Token::new("\\@char", loc.start, loc.end));
661                    return Ok(result);
662                } else {
663                    base = Some(10);
664                }
665
666                if let Some(b) = base {
667                    number = i64::from_str_radix(&tok.text, b).unwrap_or(0);
668                    loop {
669                        let next = me.future().text.clone();
670                        if let Ok(d) = i64::from_str_radix(&next, b) {
671                            me.pop_token();
672                            number = number * (b as i64) + d;
673                        } else {
674                            break;
675                        }
676                    }
677                } else {
678                    number = 0;
679                }
680
681                let s = number.to_string();
682                let loc = tok.loc.clone();
683                let mut result = vec![Token::new("}", loc.start, loc.end)];
684                for ch in s.chars().rev() {
685                    result.push(Token::new(ch.to_string(), loc.start, loc.end));
686                }
687                result.push(Token::new("{", loc.start, loc.end));
688                result.push(Token::new("\\@char", loc.start, loc.end));
689                Ok(result)
690            }),
691        );
692
693        // \operatorname: \@ifstar\operatornamewithlimits\operatorname@
694        self.macros.set(
695            "\\operatorname".to_string(),
696            MacroDefinition::Text(
697                "\\@ifstar\\operatornamewithlimits\\operatorname@".to_string(),
698            ),
699        );
700
701        // \message{...}: consume argument and discard (no-op)
702        self.macros.set(
703            "\\message".to_string(),
704            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
705                let _args = me.consume_args(1)?;
706                Ok(vec![])
707            }),
708        );
709
710        // \errmessage{...}: consume argument and discard (no-op)
711        self.macros.set(
712            "\\errmessage".to_string(),
713            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
714                let _args = me.consume_args(1)?;
715                Ok(vec![])
716            }),
717        );
718
719        // KaTeX HTML extensions: no-op (only render content, no HTML attributes).
720        // Not standard LaTeX; for compatibility we parse and expand to second argument only.
721        // \htmlStyle is registered as a real function so the renderer can honor basic CSS.
722        for name in &["\\htmlClass", "\\htmlData", "\\htmlId"] {
723            let name = (*name).to_string();
724            self.macros.set(
725                name.clone(),
726                MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
727                    let args = me.consume_args(2)?;
728                    let content = args[1].iter().cloned().rev().collect::<Vec<_>>();
729                    Ok(content)
730                }),
731            );
732        }
733
734        // \bra@ket: like \bra@set but replaces ALL | at depth 0 (for \Braket)
735        self.macros.set(
736            "\\bra@ket".to_string(),
737            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
738                let args = me.consume_args(4)?;
739                let left = args[0].clone();
740                let middle = args[1].clone();
741                let middle_double = args[2].clone();
742                let right = args[3].clone();
743
744                let content = me.consume_args(1)?;
745                let content = content.into_iter().next().unwrap();
746
747                // Convert stack-order (reversed) to logical order, replace all | at depth 0,
748                // then reverse back to stack order.
749                let logical: Vec<Token> = content.into_iter().rev().collect();
750                let mut new_logical: Vec<Token> = Vec::new();
751                let mut depth: i32 = 0;
752                let mut i = 0;
753                while i < logical.len() {
754                    let t = &logical[i];
755                    if t.text == "{" {
756                        depth += 1;
757                        new_logical.push(t.clone());
758                    } else if t.text == "}" {
759                        depth -= 1;
760                        new_logical.push(t.clone());
761                    } else if depth == 0 && t.text == "|" {
762                        // Check for || (double pipe) → middleDouble
763                        if !middle_double.is_empty()
764                            && i + 1 < logical.len()
765                            && logical[i + 1].text == "|"
766                        {
767                            // middle_double is in stack/reversed order; reverse to logical order
768                            new_logical.extend(middle_double.iter().rev().cloned());
769                            i += 2;
770                            continue;
771                        }
772                        // middle is in stack/reversed order; reverse to logical order
773                        new_logical.extend(middle.iter().rev().cloned());
774                    } else {
775                        new_logical.push(t.clone());
776                    }
777                    i += 1;
778                }
779
780                // Reverse back to stack order
781                let content_rev: Vec<Token> = new_logical.into_iter().rev().collect();
782
783                // Build: right + content + left (reversed for stack)
784                let mut to_expand = Vec::new();
785                to_expand.extend(right);
786                to_expand.extend(content_rev);
787                to_expand.extend(left);
788
789                me.begin_group();
790                let expanded = me.expand_tokens(to_expand)?;
791                me.end_group();
792
793                Ok(expanded)
794            }),
795        );
796
797        // \bra@set: braket set notation helper
798        // Only replaces the FIRST | with middle tokens (one-shot), matching KaTeX
799        self.macros.set(
800            "\\bra@set".to_string(),
801            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
802                let args = me.consume_args(4)?;
803                let left = args[0].clone();
804                let middle = args[1].clone();
805                let middle_double = args[2].clone();
806                let right = args[3].clone();
807
808                let content = me.consume_args(1)?;
809                let mut content = content.into_iter().next().unwrap();
810
811                // Scan content and replace only the first | at depth 0
812                // Content tokens are in reversed order (stack), so iterate from end
813                let mut depth: i32 = 0;
814                let mut _first_pipe_idx: Option<usize> = None;
815                // Tokens are reversed (last token first in vec), scan in logical order
816                for i in (0..content.len()).rev() {
817                    let t = &content[i];
818                    if t.text == "{" { depth += 1; }
819                    else if t.text == "}" { depth -= 1; }
820                    else if depth == 0 && t.text == "|" {
821                        // Check for || (double pipe) → middleDouble
822                        if !middle_double.is_empty() && i > 0 && content[i - 1].text == "|" {
823                            _first_pipe_idx = Some(i);
824                            // Replace || with middleDouble
825                            content.remove(i);
826                            content.remove(i - 1);
827                            let insert_at = if i >= 2 { i - 1 } else { 0 };
828                            for (j, tok) in middle_double.iter().enumerate() {
829                                content.insert(insert_at + j, tok.clone());
830                            }
831                            break;
832                        }
833                        _first_pipe_idx = Some(i);
834                        content.remove(i);
835                        for (j, tok) in middle.iter().enumerate() {
836                            content.insert(i + j, tok.clone());
837                        }
838                        break;
839                    }
840                }
841
842                // Build: right + content + left (reversed for stack)
843                let mut to_expand = Vec::new();
844                to_expand.extend(right);
845                to_expand.extend(content);
846                to_expand.extend(left);
847
848                me.begin_group();
849                let expanded = me.expand_tokens(to_expand)?;
850                me.end_group();
851
852                Ok(expanded)
853            }),
854        );
855
856        // \\ce / \\pu: KaTeX mhchem 3.3.0 (Rust port in `crate::mhchem`)
857        self.macros.set(
858            "\\ce".to_string(),
859            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
860                let args = me.consume_args(1)?;
861                let s = crate::mhchem::mhchem_arg_tokens_to_string(&args[0]);
862                let tex = crate::mhchem::chem_parse_str(&s, "ce")
863                    .map_err(|e| ParseError::msg(format!("\\ce: {e}")))?;
864                Ok(lex_string_to_stack_tokens(&tex))
865            }),
866        );
867        self.macros.set(
868            "\\pu".to_string(),
869            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
870                let args = me.consume_args(1)?;
871                let s = crate::mhchem::mhchem_arg_tokens_to_string(&args[0]);
872                let tex = crate::mhchem::chem_parse_str(&s, "pu")
873                    .map_err(|e| ParseError::msg(format!("\\pu: {e}")))?;
874                Ok(lex_string_to_stack_tokens(&tex))
875            }),
876        );
877    }
878
879    pub fn set_macro(&mut self, name: String, def: MacroDefinition) {
880        self.macros.set(name, def);
881    }
882
883    pub fn set_macro_global(&mut self, name: String, def: MacroDefinition) {
884        self.macros.set_global(name, def);
885    }
886
887    pub fn set_text_macro(&mut self, name: &str, text: &str) {
888        self.macros.set(
889            name.to_string(),
890            MacroDefinition::Text(text.to_string()),
891        );
892    }
893
894    pub fn get_macro(&self, name: &str) -> Option<&MacroDefinition> {
895        self.macros.get(name)
896    }
897
898    /// Expand a list of tokens fully (for \edef/\xdef).
899    pub fn expand_tokens(&mut self, tokens: Vec<Token>) -> ParseResult<Vec<Token>> {
900        let saved_stack = std::mem::take(&mut self.stack);
901        self.stack = tokens;
902
903        let mut result = Vec::new();
904        loop {
905            if self.stack.is_empty() {
906                break;
907            }
908            let expanded = self.expand_once(false)?;
909            if !expanded {
910                if let Some(tok) = self.stack.pop() {
911                    if tok.is_eof() {
912                        break;
913                    }
914                    result.push(tok);
915                }
916            }
917        }
918
919        self.stack = saved_stack;
920        result.reverse();
921        Ok(result)
922    }
923
924    pub fn switch_mode(&mut self, new_mode: Mode) {
925        self.mode = new_mode;
926    }
927
928    pub fn begin_group(&mut self) {
929        self.macros.begin_group();
930    }
931
932    pub fn end_group(&mut self) {
933        self.macros.end_group();
934    }
935
936    pub fn end_groups(&mut self) {
937        self.macros.end_groups();
938    }
939
940    /// Returns the topmost token on the stack, without expanding it.
941    pub fn future(&mut self) -> &Token {
942        if self.stack.is_empty() {
943            let tok = self.lexer.lex();
944            self.stack.push(tok);
945        }
946        self.stack.last().unwrap()
947    }
948
949    /// Remove and return the next unexpanded token.
950    pub fn pop_token(&mut self) -> Token {
951        self.future();
952        self.stack.pop().unwrap()
953    }
954
955    /// Modify the top token's text on the stack (for \global prefix handling).
956    pub fn set_top_text(&mut self, text: String) {
957        self.future();
958        if let Some(tok) = self.stack.last_mut() {
959            tok.text = text;
960        }
961    }
962
963    /// Push a token onto the stack.
964    pub fn push_token(&mut self, token: Token) {
965        self.stack.push(token);
966    }
967
968    /// Push multiple tokens onto the stack.
969    pub fn push_tokens(&mut self, tokens: Vec<Token>) {
970        self.stack.extend(tokens);
971    }
972
973    /// Consume all following space tokens, without expansion.
974    pub fn consume_spaces(&mut self) {
975        loop {
976            let is_space = self.future().text == " ";
977            if is_space {
978                self.stack.pop();
979            } else {
980                break;
981            }
982        }
983    }
984
985    /// Expand the next token once if possible.
986    /// Returns Ok(true) if expanded, Ok(false) if not expandable.
987    fn expand_once(&mut self, expandable_only: bool) -> ParseResult<bool> {
988        let top_token = self.pop_token();
989        let name = &top_token.text;
990
991        if top_token.noexpand {
992            self.push_token(top_token);
993            return Ok(false);
994        }
995
996        // Check for function-based macro first — always expandable
997        if let Some(MacroDefinition::Function(handler)) = self.macros.get(name).cloned() {
998            self.count_expansion(1)?;
999            let tokens = handler(self)?;
1000            self.stack.extend(tokens);
1001            return Ok(true);
1002        }
1003
1004        let expansion = self.get_expansion(name);
1005        match expansion {
1006            None => {
1007                if expandable_only && name.starts_with('\\') && !self.is_defined(name) {
1008                    return Err(ParseError::new(
1009                        format!("Undefined control sequence: {}", name),
1010                        Some(&top_token),
1011                    ));
1012                }
1013                self.push_token(top_token);
1014                Ok(false)
1015            }
1016            Some(exp) if expandable_only && exp.unexpandable => {
1017                self.push_token(top_token);
1018                Ok(false)
1019            }
1020            Some(exp) => {
1021                self.count_expansion(1)?;
1022                let mut tokens = exp.tokens;
1023                if exp.num_args > 0 {
1024                    let args = self.consume_args(exp.num_args)?;
1025                    tokens = self.substitute_args(tokens, &args);
1026                }
1027                self.stack.extend(tokens);
1028                Ok(true)
1029            }
1030        }
1031    }
1032
1033    fn substitute_args(&self, mut tokens: Vec<Token>, args: &[Vec<Token>]) -> Vec<Token> {
1034        let mut i = tokens.len();
1035        while i > 0 {
1036            i -= 1;
1037            if tokens[i].text == "#" && i > 0 {
1038                let next = &tokens[i - 1];
1039                if next.text == "#" {
1040                    tokens.remove(i);
1041                    i -= 1;
1042                } else if let Ok(n) = next.text.parse::<usize>() {
1043                    if n >= 1 && n <= args.len() {
1044                        tokens.remove(i);
1045                        tokens.remove(i - 1);
1046                        let arg_tokens = &args[n - 1];
1047                        for (j, t) in arg_tokens.iter().enumerate() {
1048                            tokens.insert(i - 1 + j, t.clone());
1049                        }
1050                        i = i.saturating_sub(1);
1051                    }
1052                }
1053            }
1054        }
1055        tokens
1056    }
1057
1058    fn get_expansion(&self, name: &str) -> Option<MacroExpansion> {
1059        let def = self.macros.get(name)?;
1060
1061        if name.len() == 1 {
1062            let ch = name.chars().next().unwrap();
1063            let catcode = self.lexer_catcode(ch);
1064            if catcode != 0 && catcode != 13 {
1065                return None;
1066            }
1067        }
1068
1069        match def {
1070            MacroDefinition::Text(text) => {
1071                let mut num_args = 0;
1072                let stripped = text.replace("##", "");
1073                while stripped.contains(&format!("#{}", num_args + 1)) {
1074                    num_args += 1;
1075                }
1076                let mut body_lexer = Lexer::new(text);
1077                let mut tokens = Vec::new();
1078                loop {
1079                    let tok = body_lexer.lex();
1080                    if tok.is_eof() {
1081                        break;
1082                    }
1083                    tokens.push(tok);
1084                }
1085                tokens.reverse();
1086                Some(MacroExpansion {
1087                    tokens,
1088                    num_args,
1089                    unexpandable: false,
1090                })
1091            }
1092            MacroDefinition::Tokens { tokens, num_args } => Some(MacroExpansion {
1093                tokens: tokens.clone(),
1094                num_args: *num_args,
1095                unexpandable: false,
1096            }),
1097            MacroDefinition::Function(_) => {
1098                // Signal that this is a function macro; handled in expand_once
1099                Some(MacroExpansion {
1100                    tokens: vec![],
1101                    num_args: 0,
1102                    unexpandable: false,
1103                })
1104            }
1105        }
1106    }
1107
1108    fn lexer_catcode(&self, ch: char) -> u8 {
1109        self.lexer.get_catcode(ch)
1110    }
1111
1112    fn count_expansion(&mut self, amount: usize) -> ParseResult<()> {
1113        self.expansion_count += amount;
1114        if self.expansion_count > self.max_expand {
1115            Err(ParseError::msg(
1116                "Too many expansions: infinite loop or need to increase maxExpand setting",
1117            ))
1118        } else {
1119            Ok(())
1120        }
1121    }
1122
1123    /// Recursively expand the next token until a non-expandable token is found.
1124    pub fn expand_next_token(&mut self) -> ParseResult<Token> {
1125        loop {
1126            let expanded = self.expand_once(false)?;
1127            if !expanded {
1128                let mut token = self.stack.pop().unwrap();
1129                if token.treat_as_relax {
1130                    token.text = "\\relax".to_string();
1131                }
1132                return Ok(token);
1133            }
1134        }
1135    }
1136
1137    /// Consume a single argument from the token stream.
1138    pub fn consume_arg(&mut self, delims: Option<&[&str]>) -> ParseResult<ConsumedArg> {
1139        let is_delimited = delims.is_some_and(|d| !d.is_empty());
1140        if !is_delimited {
1141            self.consume_spaces();
1142        }
1143
1144        let start = self.future().clone();
1145        let mut tokens = Vec::new();
1146        let mut depth: i32 = 0;
1147        let mut end_tok;
1148
1149        loop {
1150            let tok = self.pop_token();
1151            end_tok = tok.clone();
1152            tokens.push(tok.clone());
1153
1154            if tok.text == "{" {
1155                depth += 1;
1156            } else if tok.text == "}" {
1157                depth -= 1;
1158                if depth == -1 {
1159                    return Err(ParseError::new("Extra }", Some(&tok)));
1160                }
1161            } else if tok.is_eof() {
1162                return Err(ParseError::new(
1163                    "Unexpected end of input in a macro argument",
1164                    Some(&tok),
1165                ));
1166            }
1167
1168            if depth == 0 && !is_delimited {
1169                break;
1170            }
1171
1172            if let Some(delims) = delims {
1173                if is_delimited && depth == 0 {
1174                    if let Some(last) = delims.last() {
1175                        if tok.text == *last {
1176                            tokens.pop();
1177                            break;
1178                        }
1179                    }
1180                }
1181            }
1182        }
1183
1184        if start.text == "{" && tokens.last().is_some_and(|t| t.text == "}") {
1185            tokens.pop();
1186            tokens.remove(0);
1187        }
1188
1189        tokens.reverse();
1190
1191        Ok(ConsumedArg {
1192            tokens,
1193            start,
1194            end: end_tok,
1195        })
1196    }
1197
1198    /// Consume N arguments.
1199    fn consume_args(&mut self, num_args: usize) -> ParseResult<Vec<Vec<Token>>> {
1200        let mut args = Vec::with_capacity(num_args);
1201        for _ in 0..num_args {
1202            let arg = self.consume_arg(None)?;
1203            args.push(arg.tokens);
1204        }
1205        Ok(args)
1206    }
1207
1208    /// Scan a function argument (optional or mandatory).
1209    /// Pushes an EOF token to mark the end, then pushes the argument tokens.
1210    pub fn scan_argument(&mut self, is_optional: bool) -> ParseResult<Option<Token>> {
1211        if is_optional {
1212            self.consume_spaces();
1213            if self.future().text != "[" {
1214                return Ok(None);
1215            }
1216            let start = self.pop_token();
1217            let arg = self.consume_arg(Some(&["]"]))?;
1218            let end = &arg.end;
1219            let end_loc = end.loc.clone();
1220
1221            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1222            self.push_tokens(arg.tokens);
1223
1224            let result = Token {
1225                text: String::new(),
1226                loc: SourceLocation::range(&start.loc, &end_loc),
1227                noexpand: false,
1228                treat_as_relax: false,
1229            };
1230            Ok(Some(result))
1231        } else {
1232            let arg = self.consume_arg(None)?;
1233            let end_loc = arg.end.loc.clone();
1234
1235            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1236            self.push_tokens(arg.tokens);
1237
1238            let result = Token {
1239                text: String::new(),
1240                loc: SourceLocation::range(&arg.start.loc, &end_loc),
1241                noexpand: false,
1242                treat_as_relax: false,
1243            };
1244            Ok(Some(result))
1245        }
1246    }
1247
1248    /// Check if a command name is currently defined.
1249    pub fn is_defined(&self, name: &str) -> bool {
1250        self.macros.has(name)
1251            || FUNCTIONS.contains_key(name)
1252            || is_known_symbol(name)
1253            || IMPLICIT_COMMANDS.contains(&name)
1254    }
1255
1256    /// Check if a command is expandable.
1257    pub fn is_expandable(&self, name: &str) -> bool {
1258        if let Some(_def) = self.macros.get(name) {
1259            return true;
1260        }
1261        if let Some(func) = FUNCTIONS.get(name) {
1262            return !func.primitive;
1263        }
1264        false
1265    }
1266}
1267
1268pub struct ConsumedArg {
1269    pub tokens: Vec<Token>,
1270    pub start: Token,
1271    pub end: Token,
1272}
1273
1274fn handle_newcommand(
1275    me: &mut MacroExpander,
1276    exists_ok: bool,
1277    nonexists_ok: bool,
1278) -> ParseResult<Vec<Token>> {
1279    let name_arg = me.consume_arg(None)?;
1280    // name_arg.tokens is reversed (stack order); last element = first token in original
1281    let name = name_arg.tokens.last().map_or_else(String::new, |t| t.text.clone());
1282
1283    let exists = me.is_defined(&name);
1284    if exists && !exists_ok {
1285        return Err(ParseError::msg(format!(
1286            "\\newcommand{{{}}} attempting to redefine {}; use \\renewcommand",
1287            name, name
1288        )));
1289    }
1290    if !exists && !nonexists_ok {
1291        return Err(ParseError::msg(format!(
1292            "\\renewcommand{{{}}} when command {} does not yet exist; use \\newcommand",
1293            name, name
1294        )));
1295    }
1296
1297    me.consume_spaces();
1298    let mut num_args = 0usize;
1299    if me.future().text == "[" {
1300        me.pop_token();
1301        let narg_tok = me.pop_token();
1302        num_args = narg_tok.text.parse().unwrap_or(0);
1303        let close = me.pop_token();
1304        if close.text != "]" {
1305            return Err(ParseError::msg("Expected ] in \\newcommand"));
1306        }
1307    }
1308
1309    let body_arg = me.consume_arg(None)?;
1310    let tokens = body_arg.tokens;
1311
1312    me.set_macro(name, MacroDefinition::Tokens { tokens, num_args });
1313    Ok(vec![])
1314}
1315
1316fn is_known_symbol(name: &str) -> bool {
1317    use ratex_font::symbols;
1318    symbols::get_symbol(name, symbols::Mode::Math).is_some()
1319        || symbols::get_symbol(name, symbols::Mode::Text).is_some()
1320}