Skip to main content

ratex_parser/
macro_expander.rs

1use std::collections::HashMap;
2
3use ratex_lexer::token::{SourceLocation, Token};
4use ratex_lexer::Lexer;
5
6use crate::error::{ParseError, ParseResult};
7use crate::functions::FUNCTIONS;
8use crate::parse_node::Mode;
9
10/// Commands that act like macros but aren't defined as a macro, function, or symbol.
11/// Used in `is_defined`.
12pub static IMPLICIT_COMMANDS: &[&str] = &["^", "_", "\\limits", "\\nolimits"];
13
14/// Handler type for function-based macros (e.g. \TextOrMath, \@ifstar).
15/// Takes the MacroExpander mutably and returns tokens to push onto the stack.
16pub type FnMacroHandler = fn(&mut MacroExpander) -> ParseResult<Vec<Token>>;
17
18/// A macro definition: string template, token list, or function.
19#[derive(Clone)]
20pub enum MacroDefinition {
21    /// Simple string expansion (e.g., `\def\foo{bar}` → "bar")
22    Text(String),
23    /// Pre-tokenized expansion with argument count
24    Tokens {
25        tokens: Vec<Token>,
26        num_args: usize,
27    },
28    /// Function-based macro (consumes tokens directly, returns expansion)
29    Function(FnMacroHandler),
30}
31
32impl std::fmt::Debug for MacroDefinition {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            Self::Text(s) => write!(f, "Text({:?})", s),
36            Self::Tokens { tokens, num_args } => {
37                write!(f, "Tokens {{ {} tokens, {} args }}", tokens.len(), num_args)
38            }
39            Self::Function(_) => write!(f, "Function(...)"),
40        }
41    }
42}
43
44/// Result of expanding a macro once.
45struct MacroExpansion {
46    tokens: Vec<Token>,
47    num_args: usize,
48    unexpandable: bool,
49}
50
51/// The MacroExpander (or "gullet") manages macro expansion.
52///
53/// It sits between the Lexer (mouth) and the Parser (stomach).
54/// Tokens are read from the lexer, pushed onto an internal stack,
55/// and macros are expanded until only non-expandable tokens remain.
56///
57/// Modeled after KaTeX's MacroExpander.ts.
58pub struct MacroExpander<'a> {
59    pub lexer: Lexer<'a>,
60    pub mode: Mode,
61    stack: Vec<Token>,
62    macros: MacroNamespace,
63    expansion_count: usize,
64    max_expand: usize,
65}
66
67/// Scoped macro namespace supporting group nesting.
68struct MacroNamespace {
69    current: HashMap<String, MacroDefinition>,
70    group_stack: Vec<HashMap<String, Option<MacroDefinition>>>,
71}
72
73impl MacroNamespace {
74    fn new() -> Self {
75        Self {
76            current: HashMap::new(),
77            group_stack: Vec::new(),
78        }
79    }
80
81    fn get(&self, name: &str) -> Option<&MacroDefinition> {
82        self.current.get(name)
83    }
84
85    fn set(&mut self, name: String, def: MacroDefinition) {
86        if let Some(undo) = self.group_stack.last_mut() {
87            undo.entry(name.clone()).or_insert_with(|| self.current.get(&name).cloned());
88        }
89        self.current.insert(name, def);
90    }
91
92    fn set_global(&mut self, name: String, def: MacroDefinition) {
93        self.current.insert(name, def);
94    }
95
96    fn has(&self, name: &str) -> bool {
97        self.current.contains_key(name)
98    }
99
100    fn begin_group(&mut self) {
101        self.group_stack.push(HashMap::new());
102    }
103
104    fn end_group(&mut self) {
105        if let Some(undo) = self.group_stack.pop() {
106            for (name, old_val) in undo {
107                match old_val {
108                    Some(def) => { self.current.insert(name, def); }
109                    None => { self.current.remove(&name); }
110                }
111            }
112        }
113    }
114
115    fn end_groups(&mut self) {
116        while !self.group_stack.is_empty() {
117            self.end_group();
118        }
119    }
120}
121
122impl<'a> MacroExpander<'a> {
123    pub fn new(input: &'a str, mode: Mode) -> Self {
124        let mut me = Self {
125            lexer: Lexer::new(input),
126            mode,
127            stack: Vec::new(),
128            macros: MacroNamespace::new(),
129            expansion_count: 0,
130            max_expand: 1000,
131        };
132        me.load_builtins();
133        me
134    }
135
136    fn load_builtins(&mut self) {
137        let builtins: &[(&str, &str)] = &[
138            // ── Grouping ──
139            ("\\bgroup", "{"),
140            ("\\egroup", "}"),
141
142            // ── Symbols from latex.ltx ──
143            ("\\lq", "`"),
144            ("\\rq", "'"),
145            // \lbrack and \rbrack are in the symbol table directly
146            ("\\aa", "\\r a"),
147            ("\\AA", "\\r A"),
148
149            // ── Active characters ──
150            ("~", "\\nobreakspace"),
151
152            // ── Phantoms ──
153            ("\\hphantom", "\\smash{\\phantom{#1}}"),
154
155            // ── Negated symbols ──
156            ("\\not", "\\html@mathml{\\mathrel{\\mathrlap\\@not}\\nobreak}{\\char\"338}"),
157            ("\\neq", "\\html@mathml{\\mathrel{\\not=}}{\\mathrel{\\char`≠}}"),
158            ("\\ne", "\\neq"),
159            ("\u{2260}", "\\neq"),
160            ("\\notin", "\\html@mathml{\\mathrel{{\\in}\\mathllap{/\\mskip1mu}}}{\\mathrel{\\char`∉}}"),
161            ("\u{2209}", "\\notin"),
162            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
163            ("\u{220C}", "\\notni"),
164            // \le and \ge are in the symbol table directly, not macros
165
166            // ── amsmath iff/implies ──
167            ("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;"),
168            ("\\implies", "\\DOTSB\\;\\Longrightarrow\\;"),
169            ("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;"),
170
171            // ── Italic Greek capitals ──
172            ("\\varGamma", "\\mathit{\\Gamma}"),
173            ("\\varDelta", "\\mathit{\\Delta}"),
174            ("\\varTheta", "\\mathit{\\Theta}"),
175            ("\\varLambda", "\\mathit{\\Lambda}"),
176            ("\\varXi", "\\mathit{\\Xi}"),
177            ("\\varPi", "\\mathit{\\Pi}"),
178            ("\\varSigma", "\\mathit{\\Sigma}"),
179            ("\\varUpsilon", "\\mathit{\\Upsilon}"),
180            ("\\varPhi", "\\mathit{\\Phi}"),
181            ("\\varPsi", "\\mathit{\\Psi}"),
182            ("\\varOmega", "\\mathit{\\Omega}"),
183
184            // ── Spacing (mode-aware via \TextOrMath) ──
185            ("\\,", "\\TextOrMath{\\kern{.1667em}}{\\mskip{3mu}}"),
186            ("\\thinspace", "\\,"),
187            ("\\>", "\\mskip{4mu}"),
188            ("\\:", "\\TextOrMath{\\kern{.2222em}}{\\mskip{4mu}}"),
189            ("\\medspace", "\\:"),
190            ("\\;", "\\TextOrMath{\\kern{.2777em}}{\\mskip{5mu}}"),
191            ("\\thickspace", "\\;"),
192            ("\\!", "\\TextOrMath{\\kern{-.1667em}}{\\mskip{-3mu}}"),
193            ("\\negthinspace", "\\!"),
194            ("\\negmedspace", "\\TextOrMath{\\kern{-.2222em}}{\\mskip{-4mu}}"),
195            ("\\negthickspace", "\\TextOrMath{\\kern{-.2777em}}{\\mskip{-5mu}}"),
196            ("\\enspace", "\\kern.5em "),
197            ("\\enskip", "\\hskip.5em\\relax"),
198            ("\\quad", "\\hskip1em\\relax"),
199            ("\\qquad", "\\hskip2em\\relax"),
200
201            // ── Newline ──
202            ("\\newline", "\\\\\\relax"),
203
204            // ── hspace ──
205            ("\\@hspace", "\\hskip #1\\relax"),
206            ("\\@hspacer", "\\rule{0pt}{0pt}\\hskip #1\\relax"),
207
208            // ── llap / rlap / clap ──
209            ("\\llap", "\\mathllap{\\textrm{#1}}"),
210            ("\\rlap", "\\mathrlap{\\textrm{#1}}"),
211            ("\\clap", "\\mathclap{\\textrm{#1}}"),
212
213            // ── Logos ──
214            ("\\TeX", "\\textrm{\\html@mathml{T\\kern-.1667em\\raisebox{-.5ex}{E}\\kern-.125emX}{TeX}}"),
215            ("\\LaTeX", "\\textrm{\\html@mathml{L\\kern-.36em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{LaTeX}}"),
216            ("\\KaTeX", "\\textrm{\\html@mathml{K\\kern-.17em\\raisebox{0.21em}{\\scriptstyle A}\\kern-.15em\\TeX}{KaTeX}}"),
217
218            // ── imath / jmath ──
219            ("\\imath", "\\html@mathml{\\@imath}{\u{0131}}"),
220            ("\\jmath", "\\html@mathml{\\@jmath}{\u{0237}}"),
221
222            // ── minuso ──
223            ("\\minuso", "\\mathbin{\\html@mathml{{\\mathrlap{\\mathchoice{\\kern{0.145em}}{\\kern{0.145em}}{\\kern{0.1015em}}{\\kern{0.0725em}}\\circ}{-}}}{\\char`\u{29B5}}}"),
224            ("\\clap", "\\mathclap{\\textrm{#1}}"),
225
226            // ── mathstrut / underbar ──
227            ("\\mathstrut", "\\vphantom{(}"),
228            ("\\underbar", "\\underline{\\text{#1}}"),
229
230            // ── Bbbk ──
231            ("\\Bbbk", "\\Bbb{k}"),
232
233            // ── substack ──
234            ("\\substack", "\\begin{subarray}{c}#1\\end{subarray}"),
235
236            // ── boxed ──
237            ("\\boxed", "\\fbox{$\\displaystyle{#1}$}"),
238
239            // ── colon ──
240            ("\\colon", "\\nobreak\\mskip2mu\\mathpunct{}\\mathchoice{\\mkern-3mu}{\\mkern-3mu}{}{}{:}\\mskip6mu\\relax"),
241
242            // ── dots (string-based) ──
243            ("\\dots", "\\cdots"),
244            ("\\cdots", "\\@cdots"),
245            ("\\dotsb", "\\cdots"),
246            ("\\dotsm", "\\cdots"),
247            ("\\dotsi", "\\!\\cdots"),
248            ("\\dotsx", "\\ldots\\,"),
249            ("\\dotsc", "\\ldots"),  // comma list: x,\dotsc,y
250            ("\\dotso", "\\ldots"),  // other
251            ("\\DOTSI", "\\relax"),
252            ("\\DOTSB", "\\relax"),
253            ("\\DOTSX", "\\relax"),
254
255            // ── negated relations / corners (→ symbol table \@xxx) ──
256            ("\\gvertneqq", "\\@gvertneqq"),
257            ("\\lvertneqq", "\\@lvertneqq"),
258            ("\\ngeqq", "\\@ngeqq"),
259            ("\\ngeqslant", "\\@ngeqslant"),
260            ("\\nleqq", "\\@nleqq"),
261            ("\\nleqslant", "\\@nleqslant"),
262            ("\\nshortmid", "\\@nshortmid"),
263            ("\\nshortparallel", "\\@nshortparallel"),
264            ("\\nsubseteqq", "\\@nsubseteqq"),
265            ("\\nsupseteqq", "\\@nsupseteqq"),
266            ("\\ulcorner", "\\@ulcorner"),
267            ("\\urcorner", "\\@urcorner"),
268            ("\\llcorner", "\\@llcorner"),
269            ("\\lrcorner", "\\@lrcorner"),
270            ("\\varsubsetneq", "\\@varsubsetneq"),
271            ("\\varsubsetneqq", "\\@varsubsetneqq"),
272            ("\\varsupsetneq", "\\@varsupsetneq"),
273            ("\\varsupsetneqq", "\\@varsupsetneqq"),
274
275            // ── delimiters / text (compose from existing) ──
276            ("\\lBrace", "\\lbrace"),
277            ("\\rBrace", "\\rbrace"),
278            ("\\llbracket", "\\lbrack\\lbrack"),
279            ("\\rrbracket", "\\rbrack\\rbrack"),
280            ("\\copyright", "\\text{\u{00A9}}"),
281            ("\\textregistered", "\u{00AE}"),
282
283            // ── dddot / ddddot ──
284            ("\\dddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ...}}{#1}}"),
285            ("\\ddddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ....}}{#1}}"),
286
287            // ── vdots ──
288            ("\\vdots", "{\\varvdots\\rule{0pt}{15pt}}"),
289            ("\u{22ee}", "\\vdots"),
290
291            // ── bmod / pod / pmod / mod ──
292            ("\\bmod", "\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}\\mathbin{\\rm mod}\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}"),
293            ("\\pod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern8mu}{\\mkern8mu}{\\mkern8mu}(#1)"),
294            ("\\pmod", "\\pod{{\\rm mod}\\mkern6mu#1}"),
295            ("\\mod", "\\allowbreak\\mathchoice{\\mkern18mu}{\\mkern12mu}{\\mkern12mu}{\\mkern12mu}{\\rm mod}\\,\\,#1"),
296
297            // ── limsup / liminf / etc ──
298            ("\\limsup", "\\DOTSB\\operatorname*{lim\\,sup}"),
299            ("\\liminf", "\\DOTSB\\operatorname*{lim\\,inf}"),
300            ("\\injlim", "\\DOTSB\\operatorname*{inj\\,lim}"),
301            ("\\projlim", "\\DOTSB\\operatorname*{proj\\,lim}"),
302            ("\\varlimsup", "\\DOTSB\\operatorname*{\\overline{lim}}"),
303            ("\\varliminf", "\\DOTSB\\operatorname*{\\underline{lim}}"),
304            ("\\varinjlim", "\\DOTSB\\operatorname*{\\underrightarrow{lim}}"),
305            ("\\varprojlim", "\\DOTSB\\operatorname*{\\underleftarrow{lim}}"),
306
307            // ── statmath ──
308            ("\\argmin", "\\DOTSB\\operatorname*{arg\\,min}"),
309            ("\\argmax", "\\DOTSB\\operatorname*{arg\\,max}"),
310            ("\\plim", "\\DOTSB\\mathop{\\operatorname{plim}}\\limits"),
311
312            // ── mathtools colon variants ──
313            ("\\ordinarycolon", ":"),
314            ("\\vcentcolon", "\\mathrel{\\mathop\\ordinarycolon}"),
315            ("\\dblcolon", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon}}{\\mathop{\\char\"2237}}"),
316            ("\\coloneqq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2254}}"),
317            ("\\Coloneqq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}=}}{\\mathop{\\char\"2237\\char\"3d}}"),
318            ("\\coloneq", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"3a\\char\"2212}}"),
319            ("\\Coloneq", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}{\\mathop{\\char\"2237\\char\"2212}}"),
320            ("\\eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2255}}"),
321            ("\\Eqqcolon", "\\html@mathml{\\mathrel{=\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"3d\\char\"2237}}"),
322            ("\\eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon}}{\\mathop{\\char\"2239}}"),
323            ("\\Eqcolon", "\\html@mathml{\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon}}{\\mathop{\\char\"2212\\char\"2237}}"),
324            ("\\colonapprox", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"3a\\char\"2248}}"),
325            ("\\Colonapprox", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx}}{\\mathop{\\char\"2237\\char\"2248}}"),
326            ("\\colonsim", "\\html@mathml{\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"3a\\char\"223c}}"),
327            ("\\Colonsim", "\\html@mathml{\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim}}{\\mathop{\\char\"2237\\char\"223c}}"),
328
329            // ── colonequals alternate names ──
330            ("\\ratio", "\\vcentcolon"),
331            ("\\coloncolon", "\\dblcolon"),
332            ("\\colonequals", "\\coloneqq"),
333            ("\\coloncolonequals", "\\Coloneqq"),
334            ("\\equalscolon", "\\eqqcolon"),
335            ("\\equalscoloncolon", "\\Eqqcolon"),
336            ("\\colonminus", "\\coloneq"),
337            ("\\coloncolonminus", "\\Coloneq"),
338            ("\\minuscolon", "\\eqcolon"),
339            ("\\minuscoloncolon", "\\Eqcolon"),
340            ("\\coloncolonapprox", "\\Colonapprox"),
341            ("\\coloncolonsim", "\\Colonsim"),
342            ("\\simcolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
343            ("\\simcoloncolon", "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
344            ("\\approxcolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon}"),
345            ("\\approxcoloncolon", "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon}"),
346
347            // ── braket (string-based) ──
348            ("\\bra", "\\mathinner{\\langle{#1}|}"),
349            ("\\ket", "\\mathinner{|{#1}\\rangle}"),
350            ("\\braket", "\\mathinner{\\langle{#1}\\rangle}"),
351            ("\\Braket", "\\mathinner{\\langle{#1}\\rangle}"),
352            ("\\Bra", "\\left\\langle#1\\right|"),
353            ("\\Ket", "\\left|#1\\right\\rangle"),
354
355            // ── texvc (MediaWiki) ──
356            ("\\darr", "\\downarrow"),
357            ("\\dArr", "\\Downarrow"),
358            ("\\Darr", "\\Downarrow"),
359            ("\\lang", "\\langle"),
360            ("\\rang", "\\rangle"),
361            ("\\uarr", "\\uparrow"),
362            ("\\uArr", "\\Uparrow"),
363            ("\\Uarr", "\\Uparrow"),
364            ("\\N", "\\mathbb{N}"),
365            ("\\R", "\\mathbb{R}"),
366            ("\\Z", "\\mathbb{Z}"),
367            ("\\alef", "\\aleph"),
368            ("\\alefsym", "\\aleph"),
369            ("\\Alpha", "\\mathrm{A}"),
370            ("\\Beta", "\\mathrm{B}"),
371            ("\\bull", "\\bullet"),
372            ("\\Chi", "\\mathrm{X}"),
373            ("\\clubs", "\\clubsuit"),
374            ("\\cnums", "\\mathbb{C}"),
375            ("\\Complex", "\\mathbb{C}"),
376            ("\\Dagger", "\\ddagger"),
377            ("\\diamonds", "\\diamondsuit"),
378            ("\\empty", "\\emptyset"),
379            ("\\Epsilon", "\\mathrm{E}"),
380            ("\\Eta", "\\mathrm{H}"),
381            ("\\exist", "\\exists"),
382            ("\\harr", "\\leftrightarrow"),
383            ("\\hArr", "\\Leftrightarrow"),
384            ("\\Harr", "\\Leftrightarrow"),
385            ("\\hearts", "\\heartsuit"),
386            ("\\image", "\\Im"),
387            ("\\infin", "\\infty"),
388            ("\\Iota", "\\mathrm{I}"),
389            ("\\isin", "\\in"),
390            ("\\Kappa", "\\mathrm{K}"),
391            ("\\larr", "\\leftarrow"),
392            ("\\lArr", "\\Leftarrow"),
393            ("\\Larr", "\\Leftarrow"),
394            ("\\lrarr", "\\leftrightarrow"),
395            ("\\lrArr", "\\Leftrightarrow"),
396            ("\\Lrarr", "\\Leftrightarrow"),
397            ("\\Mu", "\\mathrm{M}"),
398            ("\\natnums", "\\mathbb{N}"),
399            ("\\Nu", "\\mathrm{N}"),
400            ("\\Omicron", "\\mathrm{O}"),
401            ("\\plusmn", "\\pm"),
402            ("\\rarr", "\\rightarrow"),
403            ("\\rArr", "\\Rightarrow"),
404            ("\\Rarr", "\\Rightarrow"),
405            ("\\real", "\\Re"),
406            ("\\reals", "\\mathbb{R}"),
407            ("\\Reals", "\\mathbb{R}"),
408            ("\\Rho", "\\mathrm{P}"),
409            ("\\sdot", "\\cdot"),
410            ("\\sect", "\\S"),
411            ("\\spades", "\\spadesuit"),
412            ("\\sub", "\\subset"),
413            ("\\sube", "\\subseteq"),
414            ("\\supe", "\\supseteq"),
415            ("\\Tau", "\\mathrm{T}"),
416            ("\\thetasym", "\\vartheta"),
417            ("\\weierp", "\\wp"),
418            ("\\Zeta", "\\mathrm{Z}"),
419
420            // ── Khan Academy color aliases ──
421            ("\\blue", "\\textcolor{##6495ed}{#1}"),
422            ("\\orange", "\\textcolor{##ffa500}{#1}"),
423            ("\\pink", "\\textcolor{##ff00af}{#1}"),
424            ("\\red", "\\textcolor{##df0030}{#1}"),
425            ("\\green", "\\textcolor{##28ae7b}{#1}"),
426            ("\\gray", "\\textcolor{gray}{#1}"),
427            ("\\purple", "\\textcolor{##9d38bd}{#1}"),
428
429            // ── Unicode script letters ──
430            ("\u{212C}", "\\mathscr{B}"),
431            ("\u{2130}", "\\mathscr{E}"),
432            ("\u{2131}", "\\mathscr{F}"),
433            ("\u{210B}", "\\mathscr{H}"),
434            ("\u{2110}", "\\mathscr{I}"),
435            ("\u{2112}", "\\mathscr{L}"),
436            ("\u{2133}", "\\mathscr{M}"),
437            ("\u{211B}", "\\mathscr{R}"),
438            ("\u{212D}", "\\mathfrak{C}"),
439            ("\u{210C}", "\\mathfrak{H}"),
440            ("\u{2128}", "\\mathfrak{Z}"),
441
442            // ── notni ──
443            ("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u{220C}}}"),
444
445            // ── actuarialangle ──
446            ("\\angln", "{\\angl n}"),
447
448            // ── set/Set (braket notation, simplified) ──
449            ("\\set", "\\bra@set{\\{\\,}{\\mid}{}{\\,\\}}"),
450            ("\\Set", "\\bra@set{\\left\\{\\:}{\\;\\middle\\vert\\;}{\\;\\middle\\Vert\\;}{\\:\\right\\}}"),
451
452            // ── tag ──
453            ("\\tag", "\\@ifstar\\tag@literal\\tag@paren"),
454            ("\\tag@paren", "\\tag@literal{({#1})}"),
455            ("\\tag@literal", "\\gdef\\df@tag{\\text{#1}}"),
456            // ── equation numbering (display math; no-op in parser) ──
457            ("\\nonumber", "\\relax"),
458            ("\\notag", "\\relax"),
459        ];
460
461        for &(name, expansion) in builtins {
462            self.macros.set(
463                name.to_string(),
464                MacroDefinition::Text(expansion.to_string()),
465            );
466        }
467
468        self.load_function_macros();
469    }
470
471    fn load_function_macros(&mut self) {
472        // \noexpand: mark the next token as non-expandable (only if expandable)
473        self.macros.set(
474            "\\noexpand".to_string(),
475            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
476                let mut tok = me.pop_token();
477                if me.is_expandable(&tok.text) {
478                    tok.noexpand = true;
479                    tok.treat_as_relax = true;
480                }
481                Ok(vec![tok])
482            }),
483        );
484
485        // \@firstoftwo{A}{B} → A
486        // NOTE: consume_args returns tokens in stack order (reversed).
487        // We return them as-is since expand_once does stack.extend(tokens).
488        self.macros.set(
489            "\\@firstoftwo".to_string(),
490            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
491                let args = me.consume_args(2)?;
492                Ok(args.into_iter().next().unwrap())
493            }),
494        );
495
496        // \@secondoftwo{A}{B} → B
497        self.macros.set(
498            "\\@secondoftwo".to_string(),
499            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
500                let args = me.consume_args(2)?;
501                Ok(args.into_iter().nth(1).unwrap())
502            }),
503        );
504
505        // \@ifnextchar{C}{T}{F}: peek; if next non-space == C then T else F
506        self.macros.set(
507            "\\@ifnextchar".to_string(),
508            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
509                let args = me.consume_args(3)?;
510                me.consume_spaces();
511                let next = me.future().text.clone();
512                let char_arg = &args[0];
513                // char_arg is reversed; the "first" char in original order is the last element
514                let char_text = char_arg.first().map_or("", |t| t.text.as_str());
515                if next == char_text {
516                    Ok(args[1].clone())
517                } else {
518                    Ok(args[2].clone())
519                }
520            }),
521        );
522
523        // \@ifstar{with-star}{without-star}: if next is * → consume * and use first arg
524        self.macros.set(
525            "\\@ifstar".to_string(),
526            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
527                let args = me.consume_args(2)?;
528                let next = me.future().text.clone();
529                if next == "*" {
530                    me.pop_token();
531                    Ok(args[0].clone())
532                } else {
533                    Ok(args[1].clone())
534                }
535            }),
536        );
537
538        // \TextOrMath{text-branch}{math-branch}: choose based on mode
539        self.macros.set(
540            "\\TextOrMath".to_string(),
541            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
542                let args = me.consume_args(2)?;
543                if me.mode == Mode::Text {
544                    Ok(args[0].clone())
545                } else {
546                    Ok(args[1].clone())
547                }
548            }),
549        );
550
551        // \html@mathml is registered as a function in htmlmathml.rs
552
553        // \newcommand{\name}[nargs]{body}
554        self.macros.set(
555            "\\newcommand".to_string(),
556            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
557                handle_newcommand(me, false, true)
558            }),
559        );
560
561        // \renewcommand{\name}[nargs]{body}
562        self.macros.set(
563            "\\renewcommand".to_string(),
564            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
565                handle_newcommand(me, true, false)
566            }),
567        );
568
569        // \providecommand{\name}[nargs]{body}
570        self.macros.set(
571            "\\providecommand".to_string(),
572            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
573                handle_newcommand(me, true, true)
574            }),
575        );
576
577        // \char: parse decimal/octal/hex/backtick number → \@char{N}
578        self.macros.set(
579            "\\char".to_string(),
580            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
581                let mut tok = me.pop_token();
582                let mut number: i64;
583                let base: Option<u32>;
584
585                if tok.text == "'" {
586                    base = Some(8);
587                    tok = me.pop_token();
588                } else if tok.text == "\"" {
589                    base = Some(16);
590                    tok = me.pop_token();
591                } else if tok.text == "`" {
592                    tok = me.pop_token();
593                    if tok.text.starts_with('\\') {
594                        number = tok.text.chars().nth(1).map_or(0, |c| c as i64);
595                    } else {
596                        number = tok.text.chars().next().map_or(0, |c| c as i64);
597                    }
598                    // Build \@char{N} tokens in reverse (stack order)
599                    let s = number.to_string();
600                    let loc = tok.loc.clone();
601                    let mut result = vec![Token::new("}", loc.start, loc.end)];
602                    for ch in s.chars().rev() {
603                        result.push(Token::new(ch.to_string(), loc.start, loc.end));
604                    }
605                    result.push(Token::new("{", loc.start, loc.end));
606                    result.push(Token::new("\\@char", loc.start, loc.end));
607                    return Ok(result);
608                } else {
609                    base = Some(10);
610                }
611
612                if let Some(b) = base {
613                    number = i64::from_str_radix(&tok.text, b).unwrap_or(0);
614                    loop {
615                        let next = me.future().text.clone();
616                        if let Ok(d) = i64::from_str_radix(&next, b) {
617                            me.pop_token();
618                            number = number * (b as i64) + d;
619                        } else {
620                            break;
621                        }
622                    }
623                } else {
624                    number = 0;
625                }
626
627                let s = number.to_string();
628                let loc = tok.loc.clone();
629                let mut result = vec![Token::new("}", loc.start, loc.end)];
630                for ch in s.chars().rev() {
631                    result.push(Token::new(ch.to_string(), loc.start, loc.end));
632                }
633                result.push(Token::new("{", loc.start, loc.end));
634                result.push(Token::new("\\@char", loc.start, loc.end));
635                Ok(result)
636            }),
637        );
638
639        // \operatorname: \@ifstar\operatornamewithlimits\operatorname@
640        self.macros.set(
641            "\\operatorname".to_string(),
642            MacroDefinition::Text(
643                "\\@ifstar\\operatornamewithlimits\\operatorname@".to_string(),
644            ),
645        );
646
647        // \message{...}: consume argument and discard (no-op)
648        self.macros.set(
649            "\\message".to_string(),
650            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
651                let _args = me.consume_args(1)?;
652                Ok(vec![])
653            }),
654        );
655
656        // \errmessage{...}: consume argument and discard (no-op)
657        self.macros.set(
658            "\\errmessage".to_string(),
659            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
660                let _args = me.consume_args(1)?;
661                Ok(vec![])
662            }),
663        );
664
665        // KaTeX HTML extensions: no-op (only render content, no HTML attributes).
666        // Not standard LaTeX; for compatibility we parse and expand to second argument only.
667        for name in &["\\htmlClass", "\\htmlData", "\\htmlId", "\\htmlStyle"] {
668            let name = (*name).to_string();
669            self.macros.set(
670                name.clone(),
671                MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
672                    let args = me.consume_args(2)?;
673                    let content = args[1].iter().cloned().rev().collect::<Vec<_>>();
674                    Ok(content)
675                }),
676            );
677        }
678
679        // \bra@set: braket set notation helper
680        // Only replaces the FIRST | with middle tokens (one-shot), matching KaTeX
681        self.macros.set(
682            "\\bra@set".to_string(),
683            MacroDefinition::Function(|me: &mut MacroExpander| -> ParseResult<Vec<Token>> {
684                let args = me.consume_args(4)?;
685                let left = args[0].clone();
686                let middle = args[1].clone();
687                let middle_double = args[2].clone();
688                let right = args[3].clone();
689
690                let content = me.consume_args(1)?;
691                let mut content = content.into_iter().next().unwrap();
692
693                // Scan content and replace only the first | at depth 0
694                // Content tokens are in reversed order (stack), so iterate from end
695                let mut depth: i32 = 0;
696                let mut _first_pipe_idx: Option<usize> = None;
697                // Tokens are reversed (last token first in vec), scan in logical order
698                for i in (0..content.len()).rev() {
699                    let t = &content[i];
700                    if t.text == "{" { depth += 1; }
701                    else if t.text == "}" { depth -= 1; }
702                    else if depth == 0 && t.text == "|" {
703                        // Check for || (double pipe) → middleDouble
704                        if !middle_double.is_empty() && i > 0 && content[i - 1].text == "|" {
705                            _first_pipe_idx = Some(i);
706                            // Replace || with middleDouble
707                            content.remove(i);
708                            content.remove(i - 1);
709                            let insert_at = if i >= 2 { i - 1 } else { 0 };
710                            for (j, tok) in middle_double.iter().enumerate() {
711                                content.insert(insert_at + j, tok.clone());
712                            }
713                            break;
714                        }
715                        _first_pipe_idx = Some(i);
716                        content.remove(i);
717                        for (j, tok) in middle.iter().enumerate() {
718                            content.insert(i + j, tok.clone());
719                        }
720                        break;
721                    }
722                }
723
724                // Build: right + content + left (reversed for stack)
725                let mut to_expand = Vec::new();
726                to_expand.extend(right);
727                to_expand.extend(content);
728                to_expand.extend(left);
729
730                me.begin_group();
731                let expanded = me.expand_tokens(to_expand)?;
732                me.end_group();
733
734                Ok(expanded)
735            }),
736        );
737    }
738
739    pub fn set_macro(&mut self, name: String, def: MacroDefinition) {
740        self.macros.set(name, def);
741    }
742
743    pub fn set_macro_global(&mut self, name: String, def: MacroDefinition) {
744        self.macros.set_global(name, def);
745    }
746
747    pub fn set_text_macro(&mut self, name: &str, text: &str) {
748        self.macros.set(
749            name.to_string(),
750            MacroDefinition::Text(text.to_string()),
751        );
752    }
753
754    pub fn get_macro(&self, name: &str) -> Option<&MacroDefinition> {
755        self.macros.get(name)
756    }
757
758    /// Expand a list of tokens fully (for \edef/\xdef).
759    pub fn expand_tokens(&mut self, tokens: Vec<Token>) -> ParseResult<Vec<Token>> {
760        let saved_stack = std::mem::take(&mut self.stack);
761        self.stack = tokens;
762
763        let mut result = Vec::new();
764        loop {
765            if self.stack.is_empty() {
766                break;
767            }
768            let expanded = self.expand_once(false)?;
769            if !expanded {
770                if let Some(tok) = self.stack.pop() {
771                    if tok.is_eof() {
772                        break;
773                    }
774                    result.push(tok);
775                }
776            }
777        }
778
779        self.stack = saved_stack;
780        result.reverse();
781        Ok(result)
782    }
783
784    pub fn switch_mode(&mut self, new_mode: Mode) {
785        self.mode = new_mode;
786    }
787
788    pub fn begin_group(&mut self) {
789        self.macros.begin_group();
790    }
791
792    pub fn end_group(&mut self) {
793        self.macros.end_group();
794    }
795
796    pub fn end_groups(&mut self) {
797        self.macros.end_groups();
798    }
799
800    /// Returns the topmost token on the stack, without expanding it.
801    pub fn future(&mut self) -> &Token {
802        if self.stack.is_empty() {
803            let tok = self.lexer.lex();
804            self.stack.push(tok);
805        }
806        self.stack.last().unwrap()
807    }
808
809    /// Remove and return the next unexpanded token.
810    pub fn pop_token(&mut self) -> Token {
811        self.future();
812        self.stack.pop().unwrap()
813    }
814
815    /// Modify the top token's text on the stack (for \global prefix handling).
816    pub fn set_top_text(&mut self, text: String) {
817        self.future();
818        if let Some(tok) = self.stack.last_mut() {
819            tok.text = text;
820        }
821    }
822
823    /// Push a token onto the stack.
824    pub fn push_token(&mut self, token: Token) {
825        self.stack.push(token);
826    }
827
828    /// Push multiple tokens onto the stack.
829    pub fn push_tokens(&mut self, tokens: Vec<Token>) {
830        self.stack.extend(tokens);
831    }
832
833    /// Consume all following space tokens, without expansion.
834    pub fn consume_spaces(&mut self) {
835        loop {
836            let is_space = self.future().text == " ";
837            if is_space {
838                self.stack.pop();
839            } else {
840                break;
841            }
842        }
843    }
844
845    /// Expand the next token once if possible.
846    /// Returns Ok(true) if expanded, Ok(false) if not expandable.
847    fn expand_once(&mut self, expandable_only: bool) -> ParseResult<bool> {
848        let top_token = self.pop_token();
849        let name = &top_token.text;
850
851        if top_token.noexpand {
852            self.push_token(top_token);
853            return Ok(false);
854        }
855
856        // Check for function-based macro first — always expandable
857        if let Some(MacroDefinition::Function(handler)) = self.macros.get(name).cloned() {
858            self.count_expansion(1)?;
859            let tokens = handler(self)?;
860            self.stack.extend(tokens);
861            return Ok(true);
862        }
863
864        let expansion = self.get_expansion(name);
865        match expansion {
866            None => {
867                if expandable_only && name.starts_with('\\') && !self.is_defined(name) {
868                    return Err(ParseError::new(
869                        format!("Undefined control sequence: {}", name),
870                        Some(&top_token),
871                    ));
872                }
873                self.push_token(top_token);
874                Ok(false)
875            }
876            Some(exp) if expandable_only && exp.unexpandable => {
877                self.push_token(top_token);
878                Ok(false)
879            }
880            Some(exp) => {
881                self.count_expansion(1)?;
882                let mut tokens = exp.tokens;
883                if exp.num_args > 0 {
884                    let args = self.consume_args(exp.num_args)?;
885                    tokens = self.substitute_args(tokens, &args);
886                }
887                self.stack.extend(tokens);
888                Ok(true)
889            }
890        }
891    }
892
893    fn substitute_args(&self, mut tokens: Vec<Token>, args: &[Vec<Token>]) -> Vec<Token> {
894        let mut i = tokens.len();
895        while i > 0 {
896            i -= 1;
897            if tokens[i].text == "#" && i > 0 {
898                let next = &tokens[i - 1];
899                if next.text == "#" {
900                    tokens.remove(i);
901                    i -= 1;
902                } else if let Ok(n) = next.text.parse::<usize>() {
903                    if n >= 1 && n <= args.len() {
904                        tokens.remove(i);
905                        tokens.remove(i - 1);
906                        let arg_tokens = &args[n - 1];
907                        for (j, t) in arg_tokens.iter().enumerate() {
908                            tokens.insert(i - 1 + j, t.clone());
909                        }
910                        i = i.saturating_sub(1);
911                    }
912                }
913            }
914        }
915        tokens
916    }
917
918    fn get_expansion(&self, name: &str) -> Option<MacroExpansion> {
919        let def = self.macros.get(name)?;
920
921        if name.len() == 1 {
922            let ch = name.chars().next().unwrap();
923            let catcode = self.lexer_catcode(ch);
924            if catcode != 0 && catcode != 13 {
925                return None;
926            }
927        }
928
929        match def {
930            MacroDefinition::Text(text) => {
931                let mut num_args = 0;
932                let stripped = text.replace("##", "");
933                while stripped.contains(&format!("#{}", num_args + 1)) {
934                    num_args += 1;
935                }
936                let mut body_lexer = Lexer::new(text);
937                let mut tokens = Vec::new();
938                loop {
939                    let tok = body_lexer.lex();
940                    if tok.is_eof() {
941                        break;
942                    }
943                    tokens.push(tok);
944                }
945                tokens.reverse();
946                Some(MacroExpansion {
947                    tokens,
948                    num_args,
949                    unexpandable: false,
950                })
951            }
952            MacroDefinition::Tokens { tokens, num_args } => Some(MacroExpansion {
953                tokens: tokens.clone(),
954                num_args: *num_args,
955                unexpandable: false,
956            }),
957            MacroDefinition::Function(_) => {
958                // Signal that this is a function macro; handled in expand_once
959                Some(MacroExpansion {
960                    tokens: vec![],
961                    num_args: 0,
962                    unexpandable: false,
963                })
964            }
965        }
966    }
967
968    fn lexer_catcode(&self, ch: char) -> u8 {
969        self.lexer.get_catcode(ch)
970    }
971
972    fn count_expansion(&mut self, amount: usize) -> ParseResult<()> {
973        self.expansion_count += amount;
974        if self.expansion_count > self.max_expand {
975            Err(ParseError::msg(
976                "Too many expansions: infinite loop or need to increase maxExpand setting",
977            ))
978        } else {
979            Ok(())
980        }
981    }
982
983    /// Recursively expand the next token until a non-expandable token is found.
984    pub fn expand_next_token(&mut self) -> ParseResult<Token> {
985        loop {
986            let expanded = self.expand_once(false)?;
987            if !expanded {
988                let mut token = self.stack.pop().unwrap();
989                if token.treat_as_relax {
990                    token.text = "\\relax".to_string();
991                }
992                return Ok(token);
993            }
994        }
995    }
996
997    /// Consume a single argument from the token stream.
998    pub fn consume_arg(&mut self, delims: Option<&[&str]>) -> ParseResult<ConsumedArg> {
999        let is_delimited = delims.is_some_and(|d| !d.is_empty());
1000        if !is_delimited {
1001            self.consume_spaces();
1002        }
1003
1004        let start = self.future().clone();
1005        let mut tokens = Vec::new();
1006        let mut depth: i32 = 0;
1007        let mut end_tok;
1008
1009        loop {
1010            let tok = self.pop_token();
1011            end_tok = tok.clone();
1012            tokens.push(tok.clone());
1013
1014            if tok.text == "{" {
1015                depth += 1;
1016            } else if tok.text == "}" {
1017                depth -= 1;
1018                if depth == -1 {
1019                    return Err(ParseError::new("Extra }", Some(&tok)));
1020                }
1021            } else if tok.is_eof() {
1022                return Err(ParseError::new(
1023                    "Unexpected end of input in a macro argument",
1024                    Some(&tok),
1025                ));
1026            }
1027
1028            if depth == 0 && !is_delimited {
1029                break;
1030            }
1031
1032            if let Some(delims) = delims {
1033                if is_delimited && depth == 0 {
1034                    if let Some(last) = delims.last() {
1035                        if tok.text == *last {
1036                            tokens.pop();
1037                            break;
1038                        }
1039                    }
1040                }
1041            }
1042        }
1043
1044        if start.text == "{" && tokens.last().is_some_and(|t| t.text == "}") {
1045            tokens.pop();
1046            tokens.remove(0);
1047        }
1048
1049        tokens.reverse();
1050
1051        Ok(ConsumedArg {
1052            tokens,
1053            start,
1054            end: end_tok,
1055        })
1056    }
1057
1058    /// Consume N arguments.
1059    fn consume_args(&mut self, num_args: usize) -> ParseResult<Vec<Vec<Token>>> {
1060        let mut args = Vec::with_capacity(num_args);
1061        for _ in 0..num_args {
1062            let arg = self.consume_arg(None)?;
1063            args.push(arg.tokens);
1064        }
1065        Ok(args)
1066    }
1067
1068    /// Scan a function argument (optional or mandatory).
1069    /// Pushes an EOF token to mark the end, then pushes the argument tokens.
1070    pub fn scan_argument(&mut self, is_optional: bool) -> ParseResult<Option<Token>> {
1071        if is_optional {
1072            self.consume_spaces();
1073            if self.future().text != "[" {
1074                return Ok(None);
1075            }
1076            let start = self.pop_token();
1077            let arg = self.consume_arg(Some(&["]"]))?;
1078            let end = &arg.end;
1079            let end_loc = end.loc.clone();
1080
1081            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1082            self.push_tokens(arg.tokens);
1083
1084            let result = Token {
1085                text: String::new(),
1086                loc: SourceLocation::range(&start.loc, &end_loc),
1087                noexpand: false,
1088                treat_as_relax: false,
1089            };
1090            Ok(Some(result))
1091        } else {
1092            let arg = self.consume_arg(None)?;
1093            let end_loc = arg.end.loc.clone();
1094
1095            self.push_token(Token::new("EOF", end_loc.start, end_loc.end));
1096            self.push_tokens(arg.tokens);
1097
1098            let result = Token {
1099                text: String::new(),
1100                loc: SourceLocation::range(&arg.start.loc, &end_loc),
1101                noexpand: false,
1102                treat_as_relax: false,
1103            };
1104            Ok(Some(result))
1105        }
1106    }
1107
1108    /// Check if a command name is currently defined.
1109    pub fn is_defined(&self, name: &str) -> bool {
1110        self.macros.has(name)
1111            || FUNCTIONS.contains_key(name)
1112            || is_known_symbol(name)
1113            || IMPLICIT_COMMANDS.contains(&name)
1114    }
1115
1116    /// Check if a command is expandable.
1117    pub fn is_expandable(&self, name: &str) -> bool {
1118        if let Some(_def) = self.macros.get(name) {
1119            return true;
1120        }
1121        if let Some(func) = FUNCTIONS.get(name) {
1122            return !func.primitive;
1123        }
1124        false
1125    }
1126}
1127
1128pub struct ConsumedArg {
1129    pub tokens: Vec<Token>,
1130    pub start: Token,
1131    pub end: Token,
1132}
1133
1134fn handle_newcommand(
1135    me: &mut MacroExpander,
1136    exists_ok: bool,
1137    nonexists_ok: bool,
1138) -> ParseResult<Vec<Token>> {
1139    let name_arg = me.consume_arg(None)?;
1140    // name_arg.tokens is reversed (stack order); last element = first token in original
1141    let name = name_arg.tokens.last().map_or_else(String::new, |t| t.text.clone());
1142
1143    let exists = me.is_defined(&name);
1144    if exists && !exists_ok {
1145        return Err(ParseError::msg(format!(
1146            "\\newcommand{{{}}} attempting to redefine {}; use \\renewcommand",
1147            name, name
1148        )));
1149    }
1150    if !exists && !nonexists_ok {
1151        return Err(ParseError::msg(format!(
1152            "\\renewcommand{{{}}} when command {} does not yet exist; use \\newcommand",
1153            name, name
1154        )));
1155    }
1156
1157    me.consume_spaces();
1158    let mut num_args = 0usize;
1159    if me.future().text == "[" {
1160        me.pop_token();
1161        let narg_tok = me.pop_token();
1162        num_args = narg_tok.text.parse().unwrap_or(0);
1163        let close = me.pop_token();
1164        if close.text != "]" {
1165            return Err(ParseError::msg("Expected ] in \\newcommand"));
1166        }
1167    }
1168
1169    let body_arg = me.consume_arg(None)?;
1170    let tokens = body_arg.tokens;
1171
1172    me.set_macro(name, MacroDefinition::Tokens { tokens, num_args });
1173    Ok(vec![])
1174}
1175
1176fn is_known_symbol(name: &str) -> bool {
1177    use ratex_font::symbols;
1178    symbols::get_symbol(name, symbols::Mode::Math).is_some()
1179        || symbols::get_symbol(name, symbols::Mode::Text).is_some()
1180}